from itertools import ifilter, imap MAX_PATTERN_LENGTH = 3 def test(tokens): length = len(tokens) if (length == 1): if tokens[0] == "Nexium": return "MEDICINE" elif tokens[0] == "pain": return "SYMPTOM" elif (length == 2): string = ' '.join(tokens) if string == "Barium Swallow": return "INTERVENTION" elif string == "Swallow Test": return "INTERVENTION" else: if ' '.join(tokens) == "pain in stomach": return "SYMPTOM" def _evaluate(tokens): tag = test(tokens) if tag: return (tokens, tag) elif len(tokens) == 1: return (tokens, 'O') def _splits(tokens): return ((tokens[:i], tokens[i:]) for i in xrange(min(len(tokens), MAX_PATTERN_LENGTH), 0, -1)) def sequential_pattern_match(tokens): return ifilter(bool, imap(_halves_match, _splits(tokens))).next() def _halves_match(halves): result = _evaluate(halves[0]) if result: return [result] + (halves[1] and sequential_pattern_match(halves[1])) if __name__ == "__main__": tokens = "I went to a clinic to do a Barium Swallow Test because I had pain in stomach after taking Nexium".split() output = sequential_pattern_match(tokens) slashTags = ' '.join(t + '/' + tag for tokens, tag in output for t in tokens) print(slashTags) assert slashTags == "I/O went/O to/O a/O clinic/O to/O do/O a/O Barium/INTERVENTION Swallow/INTERVENTION Test/O because/O I/O had/O pain/SYMPTOM in/SYMPTOM stomach/SYMPTOM after/O taking/O Nexium/MEDICINE" import timeit t = timeit.Timer( 'sequential_pattern_match("I went to a clinic to do a Barium Swallow Test because I had pain in stomach after taking Nexium".split())', 'from __main__ import sequential_pattern_match' ) print(t.repeat(3, 10000))
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question