python - Struggling with 'crawling' several webpages for one specific term -
i have made program in python asks search term , webpage searched. thought add functionality 'crawl' many webpages. did creating class, way know of can inherit something. here problem arises. class function gets called correctly if put in. need do not know place call class function need things can functions aren't available in other functions.
import urllib.request html.parser import htmlparser class trawler(htmlparser): def __init__(self): pass def handle_starttag(self,webpage,term): tag in webpage: if tag == 'a': if attrs == 'href': value in attrs: search_webpage_2(term, value) trawl = trawler() def main(): print('please enter search term:') searchterm = input('> ') print('please enter website searched:') searchwebsite = input('> ') search_webpage(searchterm, searchwebsite) def open_webpage(url): request = urllib.request.request(url) response = urllib.request.urlopen(request) webpage = response.read() return webpage.decode() def open_webpage_2(url): request = urllib.request.request(url) response = urllib.request.urlopen(request) webpage = response.read() return webpage.decode() def search_webpage(term, url): if term in open_webpage(url): print ('word:',term,'was found in webpage:',url) else: print ('no matches in website:',url) def search_webpage_2(term, url): if term in open_webpage_2(url): print ('word:',term,'was found in webpage:',url) else: print ('no matches in website:',url) main()
trawl.handle_starttag takes webpage, raw html of webpage , search term.
any appreciated
Comments
Post a Comment