#!/usr/bin/env python2 """Computes and prints statistics on a 'sea' of nucleotides (poor pun intended - don't judge me). Usage: python nucleo.py [path] """ import json from pprint import pprint import sys class NucleoSea(): """Computes and prints statistics on a 'sea' of nucleotides. Nucleotides can be added to a NucleoSea one sequence at a time or an entire file at a time. All symbols are stored in a dictionary whose key is the position of the given symbol. The first position of a symbol in a nucleotide is one (not zero). Attributes: pos_map: mappings of 1-indexed positions to symbols in that position dec_precision: decimal place precision of output """ def __init__(self): """Initializes instance.""" self.pos_map = {} self.dec_precision = 2 def add_nucleotide_file(self, path): """Reads nucleotide sequences from a file into memory. Args: path: Name of file containing nucleotide sequences. Raises: IOError: An error occurred while trying to read the file. """ try: with open(path, 'r') as f: for line in f: self.add_tide(line.rstrip('\n')) except: print 'Could not read file: ' + path def add_tide(self, seq): """Stores the symbols of a single nucleotide sequence in a position/symbol map. Args: seq: Nucleotide sequence. """ for i in range(len(seq)): self.pos_map.setdefault(i + 1, []).append(seq[i]) def print_occurrences(self): """Prints all occurrences of symbols at each position (for testing).""" pprint(self.pos_map) def print_percentages(self): """Prints position occurrence ratios for all combinations of positions and symbols (in JSON format).""" output_map = {} for pos, symbols in self.pos_map.items(): pct_map = {} num_sym = float(len(symbols)) for s in set(symbols): pct_map[s] = round(float(symbols.count(s) / num_sym), self.dec_precision) output_map[pos] = pct_map print json.dumps(output_map, sort_keys=True, indent=4, separators=(',', ' : ')) def main(): """Adds nucleotides to a NucleoSea instance and prints position occurrence ratios.""" sea = NucleoSea() sea.add_tide('GATAGGAGTAGTGAGT') sea.add_tide('GTAGTAGAGTATGATAGTGTA') sea.add_tide('GATTAGATGATGATG') sea.add_tide('GATATATAGATATATTAGAT') sea.add_tide('GATAGATAGT') sea.add_tide('GATTAAGATATGATAGTAG') sea.add_tide('GATTAGATAGTAGTAGT') sea.add_tide('GTATAGATAGTAGTAGTGATGA') sea.add_tide('GTAGATGATGATAGTAGTAGT') sea.add_tide('GAAGTAGTGATGAGTAG') #sea.print_occurrences() sea.print_percentages() if __name__ == '__main__': main()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question