from HTMLParser import HTMLParser output = [] # create a subclass and override the handler methods class MyHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs): print 'STARTING TAG: ', tag print tag.parent #mallard_interpretation('start_tag', tag) def handle_endtag(self, tag): print 'END TAG: ', tag #mallard_interpretation('end_tag', tag) def handle_data(self, data): help(data) print 'DATA: ', data #pass def mallard_interpretation(data_type, data): global output if(data_type == 'start_tag'): output.append('[' + data + ']') # instantiate the parser and fed it some HTML parser = MyHTMLParser() parser.feed('<html><head><title>Test</title></head>' '<body><h1>Parse me!</h1><div>Sample Div<p>Sample paragraph</p>End of sample div</div><p>Sample para 2</p></body></html>') print output
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question