__author__ = 'Hans' from collections import defaultdict import time import sys import operator def mdd(n, type): if n<=1: return type() return defaultdict(lambda:mdd(n-1, type)) def readfiles(): f1 = open(sys.argv[1], encoding='utf8') f2 = open(sys.argv[2], encoding='utf8') estSent = [] engSent = [] for line in f1: estSent.append(line.strip().split(" ")) for line in f2: engSent.append(line.strip().split(" ")) f1.close() f2.close() return (estSent, engSent) def makeDict(estSents, engSents): params = mdd(3, float) for i in range(len(estSents)): for estWord in estSents[i]: for engWord in engSents[i]: params[estWord][engWord] = 1.0 return params def train(estSents, engSents, params, loop=5): for x in range(loop): expCount = mdd(3, float) for i in range(len(estSents)): for tgtWord in engSents[i]: srcNormSum = 0.0 for srcWord in estSents[i]: srcNormSum += params[srcWord][tgtWord] for srcWord in estSents[i]: aliPointProb = params[srcWord][tgtWord] / srcNormSum expCount[srcWord][tgtWord] += aliPointProb expCount[srcWord]['_total'] += aliPointProb for x in params.keys(): for y in params[x].keys(): params[x][y] = expCount[x][y] / expCount[x]['_total'] return params def output(estSents, engSents, params): for i in range(len(estSents)-2000): print("----------------------------") print(estSents[i]) print(engSents[i]) for estWord in estSents[i]: sorted_x = sorted(params[estWord].items(), key=operator.itemgetter(1), reverse=True) engWord = sorted_x[0][0] print(estWord + " - " + engWord) def main(): start = time.time() estSents, engSents = readfiles() params = makeDict(estSents, engSents) params = train(estSents, engSents, params) output(estSents, engSents, params) '''for x in "roosid on punased , kannikesed on sinised .".split(" "): #print(x + " - " + f1(params[x])) for y in params[x].keys(): print(x + " " + y + ": " + str(params[x][y]))''' print(time.time() - start) main()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question