__author__ = 'Dorota' import collections, re def find_all_digrams(corps, set_of_nouns, set_of_genitives, dict_of_digrams): for line in range(len(corps)) : for nr_elementu in range(len(corps[line])): word_last = corps[line][nr_elementu] if nr_elementu > 0 : word_first = corps[line][nr_elementu-1] if word_last in set_of_nouns and word_first in set_of_nouns: key = word_first + " " + word_last common_bonus(key, dict_of_digrams) is_genitive_bonus(key, set_of_genitives, dict_of_digrams) print line print dict_of_digrams def common_bonus(digram, dict_of_digrams): if dict_of_digrams.has_key(digram): dict_of_digrams[digram] += 1 else: dict_of_digrams[digram] = 1 def is_genitive_bonus(digram, set_of_genitives, dict_of_digrams): pair = digram.split(" ") if pair[0] in set_of_genitives and pair[1] in set_of_genitives: dict_of_digrams[digram] += 3 corps = open('korpus_tm1.txt', 'r') all_words_in_corps = map(lambda l: l.split(" "), corps.readlines()) list_of_nouns = open('lista_rzeczownikow.txt').read().split('\n') set_of_nouns = set(list_of_nouns) list_of_genitives = open('lista_dopelniaczy.txt').read().split('\n') set_of_genitives = set(list_of_genitives) dict_of_digrams = {} funkcje_digramy.find_all_digrams(all_words_in_corps, set_of_nouns, set_of_genitives, dict_of_digrams)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question