# -*- coding: utf-8 -*- import os from collections import defaultdict #Listdir der ikke finder .ds_store def mylistdir(directory): filelist = os.listdir(directory) return [x for x in filelist if not (x.startswith('.'))] #Fjerner linjeskift, punktum, komma og gør alt lower_case def index(text): text = text.replace('\n', ' ') text = text.replace('.', ' ') text = text.replace(',', ' ') text = text.lower() return text def step1(): #Laver liste med dokumenter i korpusmappen. Gør korpusmappen til cwd documents = mylistdir('korpus') os.chdir('korpus') indexed_docs = {} #Åbner og læser hvert dokument i documents for document in documents: f = open(document) text = f.read() f.close() ##Indekserer teksten cleantext = index(text) #Laver liste med strenge af ord splText = cleantext.split() indexed_docs[document] = splText return indexed_docs cleaned_content = step1() def invert(cleaned_content): return dict((v,k) for k in cleaned_content for v in cleaned_content[k]) inverted_index = invert(cleaned_content) #Laver inverted index def inverted_index(): #Laver defaulttypen af dictionary til list-type. inverted_index = defaultdict(list) for k in cleaned_content: for x in cleaned_content[k]: if x not in inverted_index.itervalues(): inverted_index[x].append(k) inverted_index = dict(inverted_index) return inverted_index inverted_index = inverted_index() os.chdir('..') print os.getcwd() #Indlaeser foresporgsler def queries(): f = open('foresporgsler.txt') text = f.read() f.close() cleantext = index(text) splText = cleantext.split() print splText return splText queries = queries() #BRUGER INPUT user_query = raw_input('Indtast foresporgsel\n') if user_query not in queries: print 'Proev igen' user_query = raw_input('Indtast foresporgsel\n') """user_query2 = raw_input('Indtast ekstra forespoergsel\n') if user_query2 not in queries: print 'Proev igen' user_query2 = raw_input('Indtast foresporgsel\n')""" #SØGNING def searcher(user_query): results = [] for k,v in inverted_index.items(): if k == user_query: results = v return results results = searcher(user_query) #Frekvens def frequency(): results_freq = [] for doc in results: freq = results.count(doc) freq2 = str(freq) freq2 = '('+freq2+')' if freq > 1 and doc+str(freq) not in results_freq: results_freq.append(doc(freq)) elif freq <= 1: results_freq.append(doc) print results_freq frequency()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question