#program to extract n no. of links in one line links target = r'B:Python CourseCh18_Working_with_filesCh18_Ex2html.txt' extract = r'B:Python CourseCh18_Working_with_filesCh18_Ex2extract.txt'
def url_finder(link): if "href" in link: comma_split = link.split('"') # ' " ' return [each for each in comma_split if each[-4: ].lower() == ".com" ]
def transfer(target, extract): with open(target) as rf: with open(extract, 'w') as wf: for line in rf.readlines(): contain_url = url_finder(line) if contain_url != None: for extractedLink in contain_url: wf.write(f"n> {extractedLink}") wf.close() rf.close()
Dear sir, please help. I thought this should work, but it isn't. Please tell if there is some error. It returns the result as the previous code.
with open('Webpage.htm', 'r') as rf: with open('output.txt', 'w') as wf: for line in rf.readlines(): if 'a href' in line: l1 = line.split(""") for l in l1: if 'www' in l: wf.write(f"{l}n")
with open(r'.htmlmyTest.html') as rf:
with open(r'.htmlnew2.html','a') as wf:
for data in rf.readlines():
if data.__contains__('<a href'):
list_urls=data.split(""")
for link in list_urls:
if "www" in link:
wf.write(link+"n")
we can also use count and a for loop for simplification in previous program!!!
We can also do as batter solution:
with open("file.html","r") as html_file:
with open("output_file.txt","a") as output_file:
for line in html_file.readlines():
position = line.find("<a href=")
for i in range(line.count("<a href=")):
firstquate_position = line.find('"',position)
secondquate_position = line.find('"',firstquate_position+1)
url = line[firstquate_position+1:secondquate_position]
# print(url)
output_file.write("==> "+url+"n")
position = secondquate_position + 1
Note : This is 100% working try it
Hello sir!! Here May I use page.seek(second_quote) instead of page=page[second_quote: ] ?
with open("helpEx1",'r') as f1:
with open("helpEx2",'a') as f2:
for line in f1.readlines():
pos_com=-1
f_quote=pos_com
while(line.find(".com",pos_com+1)!=-1):
pos_com=line.find(".com",pos_com+1)
f_quote=line.find(""",f_quote+1)
l=line[f_quote+1:pos_com+4]
f2.write(f"{l}n")
f_quote=pos_com+5
continue
Sir Check me out ..
More enhanced way
#program to extract n no. of links in one line links
target = r'B:Python CourseCh18_Working_with_filesCh18_Ex2html.txt'
extract = r'B:Python CourseCh18_Working_with_filesCh18_Ex2extract.txt'
def url_finder(link):
if "href" in link:
comma_split = link.split('"') # ' " '
return [each for each in comma_split if each[-4: ].lower() == ".com" ]
def transfer(target, extract):
with open(target) as rf:
with open(extract, 'w') as wf:
for line in rf.readlines():
contain_url = url_finder(line)
if contain_url != None:
for extractedLink in contain_url:
wf.write(f"n> {extractedLink}")
wf.close()
rf.close()
transfer(target, extract)
Dear sir, please help. I thought this should work, but it isn't. Please tell if there is some error. It returns the result as the previous code.
with open('Webpage.htm', 'r') as rf:
with open('output.txt', 'w') as wf:
for line in rf.readlines():
if 'a href' in line:
l1 = line.split(""")
for l in l1:
if 'www' in l:
wf.write(f"{l}n")
How about this way?
import re
with open('./data.txt') as webpage:
urls = re.findall(r'(http[^"]+|www[^"]+)', webpage.read())
for u in urls:
print(u)
awesome
you are great sir
nice video ! can you please upload the Source Code Link ?
and how to import numpy in visual studio code ?
Sir ji Java vi sikhye na.
Sir g, you are best. Love from Pakistan. I have a query. You have described in your playlist description GUI with Tkinter. I want to ask why not PyQt?
Sir are you complete the writing of python book?
es series m aap hame numpy or pandas k bare m sikhayege??????????????????
What a wonderful
when full course will be upladed sir
appreciate all time
You are great sir
Nice video sir
How to count video duration time in asp.net
Nice video sir…jaldi se course khtm kr do
Aur video kab tak upload hoge
Sir u r great