''' 20cw - twenty most common words Usage: python 20cw.py shakespeare.txt ''' class CommonWords: def __init__(self, nwords=20): self.nwords = nwords # number of words to find self.mincount = 0 # least common of our most common set self.words = {} # words and their counts self.maxwords = {} # the twenty (nword) most common words def add_word(self, word): ''' add a word occurrence to self.words and update maxwords ''' n = self.words.get(word, 0) + 1 # increment count self.words[word] = n if n > self.mincount: # if frequent update maxwords self.maxwords[word] = n while len(self.maxwords) > self.nwords: # trim as needed del self.maxwords[self.lfmw()] self.mincount = self.maxwords[self.lfmw()] def lfmw(self): ''' return least frequent word in maxwords ''' return min(self.maxwords, key=self.maxwords.get) def __str__(self): vals = [(self.maxwords[w], w) for w in self.maxwords] vals.sort(key=lambda y: -y[0]) # reverse sort most freq first lines = ['%d %s' % x for x in vals] # build array of lines return '\n'.join(lines) # join together with newlines import sys test_data = """ Parallel UniverseThe Parallel Universe Blog May 01, 2014 Not Your Father's Java: An Opinionated Guide to Modern Java Development, Part 1 More working, useful code has been written in the Java programming language than in any other in history, with the possible exceptions of C and COBOL. When Java was released almost 20 years ago, it took the software world by storm. It was a simpler, safer, alternative to C++, and some time later its performance caught up, too (depending on the exact usage, a large Java program can be slightly slower, as fast, or a little faster than a comparable C++ codebase). It offered truly tremendous productivity benefits over C++, while sacrificing very little (if anything at all) in return. Java is a blue-collar language the working persons trusty tool adopting only tried and true idioms, and adding features only if they solve major pain points. Whether or not Java has stayed true to its mission or not is an open question, but it certainly tries not to let current fashions sway it too far off course. Java has been used to write code for anything from smart-cards, through embedded devices, and all the way to mainframes. It is even being used to write mission- and safety-critical hard realtime software. """ if __name__ == '__main__': cw = CommonWords(20) # for twenty most frequent words # for line in open(sys.argv[1]): # first arg is name of file to read for line in test_data.split('\n'): # from test_data for word in line.split(): # TODO handle punctuation cw.add_word(word) # increment frequency count print cw # invokes __str__ to get string conversion
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question