import segment # dictValueTotal: {any -> number} -> number # compute the sum total of all values in a dictionary # with numerical values def dictValueTotal(theDict): total = 0 for key in theDict: total = total + theDict[key] return total # fileToDict: string -> {string -> int} # open a file with the given filename and parse its contents # into a dictionary. The keys are strings and the values are ints. def fileToDict(filename): myDict = {} with open(filename, 'r') as inputFile: for line in inputFile: key, value = line.strip().split() myDict[key] = int(value) return myDict # createProbabilityFunction: {string: int} -> (string -> float) # Output a function which accepts as input a word and produces as output a # float representing the probability that this word occurs at random. def createProbabilityFunction(wordFreqDict): n = dictValueTotal(wordFreqDict) # make sure to do this outside the inner function def probFunction(word): if word not in wordFreqDict: return 1.0 / (n * 10**(len(word) - 2)) return float(wordFreqDict[word]) / n return probFunction # segmentLines: string, string -> [string] # use the segment function in the segment library to segment # each line of a given file def segmentLines(wordFreqFilename, wordSegmentFilename): freqDict = fileToDict(wordFreqFilename) p = createProbabilityFunction(freqDict) segmentedLines = [] with open(wordSegmentFilename, 'r') as inputFile: for line in inputFile: segmentedLine = segment.segment(line.strip(), p) segmentedLines.append(' '.join(segmentedLine)) return segmentedLines if __name__ == "__main__": from unittest import test # test fileToDict with open('test.txt', 'w') as temp: temp.write("example\t15\n") testDict = fileToDict('test.txt') test({'example': 15}, testDict) test(100, dictValueTotal({'a': 50, 'b': 25, 'c':25})) test(0, dictValueTotal({})) # test on an empty dictionary test(1.0, createProbabilityFunction({'example':15})('example')) D = {'example':15, 'other':85} test(0.15, createProbabilityFunction(D)('example')) test(0.85, createProbabilityFunction(D)('other')) test(1.0 / (100 * 10**3), createProbabilityFunction(D)('crap!')) with open('counts-test.txt', 'w') as temp: temp.write("hello\t10\nthere\t10\n") with open('strings-test.txt', 'w') as temp: temp.write("hellothere\n") test(['hello there'], segmentLines('counts-test.txt', 'strings-test.txt')) # cleanup (the os module! very handy for file operations) import os os.remove('counts-test.txt') os.remove('strings-test.txt')
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question