import re; # IDs for the different context_type values HTML_TEXT = 1 HTML_ATTRIBUTE = 2 JS_STRING = 3 # Dictionnary containing the regex for the different contexts REGEX = { HTML_TEXT: r"<[^>]*>", HTML_ATTRIBUTE: r"(<[^>]*>)|\"", JS_STRING: r"<[^>]*>" } # Cleans up a string for output def sanitize(input, context_type): try: # Trying to substring the matches return re.sub(REGEX[context_type], "", input) except KeyError: # No regex corresponding for context_type raise Exception("Wrong argument for context type."); # Testing a little bit html = u'<ul class="class"><li/>Lorem ipsum dolor "sit amet, consectetuer adipiscing elit.</li><li class="class">Aliquam tincidunt mauris eu risus.</li><li>Vestibulum auctor dapibus neque.</li></ul>' print sanitize(html, HTML_TEXT) print sanitize(html, HTML_ATTRIBUTE) print sanitize(html, JS_STRING) print sanitize(html, "")
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question