''' Created on Jul 7, 2015 @author: olivier ''' import sys def naive(p, t): occurrences = [] for i in range(len(t) - len(p) + 1): # loop over alignments match = True for j in range(len(p)): # loop over characters if t[i+j] != p[j]: # compare characters match = False break if match: occurrences.append(i) # all chars matched; record return occurrences def naive_2mm(p, t): occurrences = [] for i in range(len(t) - len(p) + 1): # loop over alignments match = True mm_count = 0 for j in range(len(p)): # loop over characters if t[i+j] != p[j]: mm_count += 1 if mm_count > 2: # compare characters match = False break if match: occurrences.append(i) # all chars matched; record return occurrences def naive_with_rc(p, t): occurrences = naive(p,t) q = reverseComplement(p) if p != q: occurrences_r = naive(q,t) else: occurrences_r = '' # else: # print p+" is a palindrome of "+q return occurrences,occurrences_r def reverseComplement(s): complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'} t = '' for base in s: t = complement[base] + t return t def readGenome(filename): genome = '' with open(filename, 'r') as f: for line in f: # ignore header line with genome information if not line[0] == '>': genome += line.rstrip() return genome def readFastq(filename): sequences = [] qualities = [] with open(filename) as fh: while True: fh.readline() # skip name line seq = fh.readline().rstrip() # read base sequence fh.readline() # skip placeholder line qual = fh.readline().rstrip() # base quality line if len(seq) == 0: break sequences.append(seq) qualities.append(qual) return sequences, qualities def phread33ToQ(qual): return ord(qual)-33 def computeMean(seqArray): tot = 0 for seq in seqArray: tot += seq return tot/len(seqArray) def check_qualities(quals): qPerSeq = {} for qual in quals: for idx in range(len(qual)): if not idx in qPerSeq: qPerSeq[idx] = [] qPerSeq[idx].append(phread33ToQ(qual[idx])) qPerSeqMean = {} for key in qPerSeq.keys(): qPerSeqMean[key] = computeMean(qPerSeq[key]) minKey = -1 minVal = sys.maxint for key in qPerSeqMean.keys(): if qPerSeqMean[key] < minVal: minKey = key minVal = qPerSeqMean[key] print "Minimum value is "+str(minVal)+" at key "+str(minKey) """ Main """ # t = readGenome('lambda_virus.fa') # occurrences = naive_2mm('AGGAGGTT', t) # print str(len(occurrences))+" "+str(occurrences) # # print str(len(occurrences_r))+" "+str(occurrences_r) t = 'AGGTAGGTAGGT' occur = naive('AGGT',t) print str(occur)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question