#Or use URL? import urllib2 webUrl = urllib2.urlopen("www.bbc.co.uk") # import the HTMLParser module from HTMLParser import HTMLParser def handle_data(self, data): print "Encountered some data:", data pos = self.getpos() print "At line: ", pos[0], " position ", pos[1] def main(): parser = MyHTMLParser f = open("www.bbc.co.uk") if f.mode == "r": contents = f.read() parser.feed(contents) def get_char_count(words): characters = 0 for word in words: characters += len(word.decode("utf-8")) return characters def get_words(text=''): words = [] words = TOKENIZER.tokenize(text) filtered_words = [] for word in words: if word in SPECIAL_CHARS or word == " ": pass else: new_word = word.replace(",","").replace(".","") new_word = new_word.replace("!","").replace("?","") filtered_words.append(new_word) return filtered_words def get_sentences(text=''): tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') sentences = tokenizer.tokenize(text) return sentences def count_syllables(words): syllableCount = 0 for word in words: syllableCount += syllables_en.count(word) return syllableCount class Readability: analyzedVars = {} def __init__(self, text): self.analyze_text(text) def analyze_text(self, text): words = get_words(text) char_count = get_char_count(words) word_count = len(words) sentence_count = len(get_sentences(text)) syllable_count = count_syllables(words) complexwords_count = count_complex_words(text) avg_words_p_sentence = word_count/sentence_count self.analyzedVars = { 'words': words, 'char_cnt': float(char_count), 'word_cnt': float(word_count), 'sentence_cnt': float(sentence_count), 'syllable_cnt': float(syllable_count), 'complex_word_cnt': float(complexwords_count), 'avg_words_p_sentence': float(avg_words_p_sentence) } def FleschReadingEase(self): score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = 206.835 - (1.015 * (self.analyzedVars['avg_words_p_sentence'])) - (84.6 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt'])) return round(score, 4) def FleschKincaidGradeLevel(self): score = 0.0 if self.analyzedVars['word_cnt'] > 0.0: score = 0.39 * (self.analyzedVars['avg_words_p_sentence']) + 11.8 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt']) - 15.59 return round(score, 4) rd = Readability(text) print 'Test text:' print '"%s"\n' % text print 'FleschReadingEase: ', rd.FleschReadingEase() print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel() def main(): c = Readability()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question