import re; # IDs for the different context_type values HTML_TEXT = 1 HTML_ATTRIBUTE = 2 JS_STRING = 3 # Cleans up a string for output def sanitize(input, context_type): output = "" if context_type == HTML_TEXT: output = re.sub(r"<[^>]*>", "", input) elif context_type == HTML_ATTRIBUTE: return re.sub(r"(<[^>]*>)|\"|'", "", input) elif context_type == JS_STRING: output = re.sub(r"'", "\\'", input) output = re.sub(r"\"", "\\\"", output) output = re.sub(r"<[^>]*>", "", output) else: # No regex corresponding for context_type raise Exception("Wrong argument for context type."); return output # Testing a little bit html = u'<ul class="class"><li/>Lorem\'ipsum dolor "sit amet, consectetuer adipiscing elit.</li><li class="class">Aliquam tincidunt mauris eu risus.</li><li>Vestibulum auctor dapibus neque.</li></ul>' print sanitize(html, HTML_TEXT) print sanitize(html, HTML_ATTRIBUTE) print sanitize(html, JS_STRING) print sanitize(html, "")
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question