#Feeling Lucky #In Unit 6, we implemented a page ranking algorithm, but didn't finish the final #step of using it to improve our search results. For this question, you will use #the page rankings to produce the best output for a given query. #Define a procedure, lucky_search, that takes as input an index, a ranks #dictionary (the result of compute_ranks), and a keyword, and returns the one #URL most likely to be the best site for that keyword. If the keyword does not #appear in the index, lucky_search should return None. def lucky_search(index, ranks, keyword): if lookup(index,keyword): limit_ranks=[] for e in index[keyword]: limit_ranks.append(ranks[e]) for p in ranks: if ranks[p]==max(limit_ranks): return p cache = { 'http://udacity.com/cs101x/urank/index.html': """<html> <body> <h1>Dave's Cooking Algorithms</h1> <p> Here are my favorite recipies: <ul> <li> <a href="http://udacity.com/cs101x/urank/hummus.html">Hummus Recipe</a> <li> <a href="http://udacity.com/cs101x/urank/arsenic.html">World's Best Hummus</a> <li> <a href="http://udacity.com/cs101x/urank/kathleen.html">Kathleen's Hummus Recipe</a> </ul> For more expert opinions, check out the <a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a> and <a href="http://udacity.com/cs101x/urank/zinc.html">Zinc Chef</a>. </body> </html> """, 'http://udacity.com/cs101x/urank/zinc.html': """<html> <body> <h1>The Zinc Chef</h1> <p> I learned everything I know from <a href="http://udacity.com/cs101x/urank/nickel.html">the Nickel Chef</a>. </p> <p> For great hummus, try <a href="http://udacity.com/cs101x/urank/arsenic.html">this recipe</a>. </body> </html> """, 'http://udacity.com/cs101x/urank/nickel.html': """<html> <body> <h1>The Nickel Chef</h1> <p> This is the <a href="http://udacity.com/cs101x/urank/kathleen.html"> best Hummus recipe! </a> </body> </html> """, 'http://udacity.com/cs101x/urank/kathleen.html': """<html> <body> <h1> Kathleen's Hummus Recipe </h1> <p> <ol> <li> Open a can of garbonzo beans. <li> Crush them in a blender. <li> Add 3 tablesppons of tahini sauce. <li> Squeeze in one lemon. <li> Add salt, pepper, and buttercream frosting to taste. </ol> </body> </html> """, 'http://udacity.com/cs101x/urank/arsenic.html': """<html> <body> <h1> The Arsenic Chef's World Famous Hummus Recipe </h1> <p> <ol> <li> Kidnap the <a href="http://udacity.com/cs101x/urank/nickel.html">Nickel Chef</a>. <li> Force her to make hummus for you. </ol> </body> </html> """, 'http://udacity.com/cs101x/urank/hummus.html': """<html> <body> <h1> Hummus Recipe </h1> <p> <ol> <li> Go to the store and buy a container of hummus. <li> Open it. </ol> </body> </html> """, } def get_page(url): if url in cache: return cache[url] return "" def get_next_target(page): start_link = page.find('<a href=') if start_link == -1: return None, 0 start_quote = page.find('"', start_link) end_quote = page.find('"', start_quote + 1) url = page[start_quote + 1:end_quote] return url, end_quote def get_all_links(page): links = [] while True: url, endpos = get_next_target(page) if url: links.append(url) page = page[endpos:] else: break return links def union(a, b): for e in b: if e not in a: a.append(e) def add_page_to_index(index, url, content): words = content.split() for word in words: add_to_index(index, word, url) def add_to_index(index, keyword, url): if keyword in index: index[keyword].append(url) else: index[keyword] = [url] def lookup(index, keyword): if keyword in index: return index[keyword] else: return None def crawl_web(seed): # returns index, graph of inlinks tocrawl = [seed] crawled = [] graph = {} # <url>, [list of pages it links to] index = {} while tocrawl: page = tocrawl.pop() if page not in crawled: content = get_page(page) add_page_to_index(index, page, content) outlinks = get_all_links(content) graph[page] = outlinks union(tocrawl, outlinks) crawled.append(page) return index, graph def compute_ranks(graph): d = 0.8 # damping factor numloops = 10 ranks = {} npages = len(graph) for page in graph: ranks[page] = 1.0 / npages for i in range(0, numloops): newranks = {} for page in graph: newrank = (1 - d) / npages for node in graph: if page in graph[node]: newrank = newrank + d * (ranks[node] / len(graph[node])) newranks[page] = newrank ranks = newranks return ranks #Here's an example of how your procedure should work on the test site: index, graph = crawl_web('http://udacity.com/cs101x/urank/index.html') ranks = compute_ranks(graph) print lucky_search(index, ranks, 'Hummus') #>>> http://udacity.com/cs101x/urank/kathleen.html print lucky_search(index, ranks, 'the') #>>> http://udacity.com/cs101x/urank/nickel.html print lucky_search(index, ranks, 'babaganoush') #>>> None
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question