# see http://stackoverflow.com/questions/6722985/fastest-way-in-python-to-find-a-startswith-substring-in-a-long-sorted-list-of-s ## http://pastebin.com/sCd4424k import random import time import string from bisect import bisect_left WORDLIST_FILENAME = "words.txt" def load_words(): """ Returns a list of valid words. Words are strings of lowercase letters. Depending on the size of the word list, this function may take a while to finish. """ print "Loading word list from file..." # inFile: file inFile = open(WORDLIST_FILENAME, 'r', 0) # wordlist: list of strings wordlist = [] for line in inFile: wordlist.append(line.strip().lower()) print " ", len(wordlist), "words loaded." return wordlist def frag_wordexists_Peter_simple(wordlist, word_fragment): return any(w.startswith(word_fragment) for w in wordlist) def frag_wordexists_Peter_bisect_left(wordlist, word_fragment): try: return wordlist[bisect_left(wordlist, word_fragment)].startswith(word_fragment) except IndexError: return False # word_fragment is greater than all entries in wordlist def trim_list_comprehension(wordlist, word_fragment): """returns the wordlist containing only words starting with 'word_fragment'""" wordlist = [w for w in wordlist if w.startswith(word_fragment)] return wordlist def trim_bisect(wordlist, word_fragment): """returns the wordlist containing only words starting with 'word_fragment'""" wordlist[bisect_left(wordlist, word_fragment): bisect_left(wordlist, word_fragment[:-1] + chr(ord(word_fragment[-1])+1))] return wordlist # Actually load the dictionary of words and point to it with # the wordlist variable so that it can be accessed from anywhere # in the program. wordlist = load_words() trimwordlist_comprehension = wordlist[:] trimwordlist_bisect = wordlist[:] peter_simple_total = peter_bisect_total = trim_comp_total = trim_bisect_total = 0 for word_fragment in ("p", "py", "pyt", "pyth", "pytho"): print "Testing '" + word_fragment + "' with Peter's simple test" start = time.time() if frag_wordexists_Peter_simple(wordlist, word_fragment): runtime = time.time() - start print runtime peter_simple_total += runtime print "Testing '" + word_fragment + "' with Peter's bisect left test" start = time.time() if frag_wordexists_Peter_bisect_left(wordlist, word_fragment): runtime = time.time() - start print runtime peter_bisect_total += runtime print "Testing '" + word_fragment + "' with trimming the list using a list comprehension" start = time.time() trimwordlist_comprehension = trim_list_comprehension(trimwordlist_comprehension, word_fragment) if len(trimwordlist_comprehension): runtime = time.time() - start print runtime trim_comp_total += runtime print "Testing '" + word_fragment + "' with trimming the list using Neil G's bisect" start = time.time() trimwordlist_bisect = trim_bisect(trimwordlist_bisect, word_fragment) if len(trimwordlist_bisect): runtime = time.time() - start print runtime trim_bisect_total += runtime print "" print 'The totals for all strings "p", "py", "pyt", "pyth", "pytho" are as follows:' print "In total, Peter's simple test took " + str(peter_simple_total) print "In total, Peter's bisect left test took " + str(peter_bisect_total) print "In total, the list comprehension took " + str(trim_comp_total) print "In total, Neil G's bisect took " + str(trim_bisect_total)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question