"""Get top ten domains from a webserver log file. Get top ten domains from a webserver log file. The file is at /tmp/log.txt and one sample line looks like this: foo\twww.bar.com/baz\t1234\n We want a listing of domains and their frequency in the log file like this: 234 bar.com 123 foo.com … up to ten. """ def TopN(log_file, n): domain_map = {} top_n = [] lines = log_file.split('\n') for line in lines: values = line.split('\t') if len(values) < 3: continue url = values[1] if url.find('.com') > 0: parts = url.split('/') url = parts[0] url = url.lstrip('www.') if url in domain_map: domain_map[url] += 1 else: domain_map[url] = 1 temp = [] for key, value in domain_map.iteritems(): temp.append((value, key)) sorted_list = sorted(temp, reverse=True) for i in range(n): count = sorted_list[i][0] domain = sorted_list[i][1] top_n.append((domain, count)) return top_n twodomains = """ badline\n foo\twww.bar.com/baz\t1234\n bar\twww.foo.com/baz\t2345\n bar\twww.foo.com/baz\t2345\n bar\tfoo.com\t2345\n bar\tsky.com\t2345\n bar\tbadcom\n """ print TopN(twodomains, 2)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question