import itertools bases = ['A','C','G','T'] complements = {'A':'T','T':'A','C':'G','G':'C'} # n = length of sequences required n = 6 # k = length of complementary region on which to exclude sequences k = 4 # find all sequences of length n nmers = [] for p in itertools.product(bases,repeat=n): nmers.append(''.join(p)) print "Total number of nmers: ",len(nmers) # remove those with runs of 4 of the same base no_repeats = [] to_remove = [] if n > 3: for sequence in nmers: for i in range(0,n-3): if sequence[i] == sequence[i+1] == sequence[i+2] == sequence[i+3]: to_remove.append(sequence) if not (sequence in to_remove): no_repeats.append(sequence) else: no_repeats = nmers print "Nmers with no runs of 4 identical bases: ",len(no_repeats) # remove complementary palindromes no_palindromes = [] for sequence in no_repeats: if n % 2 == 0: first_half = sequence[:(n/2)] second_half = sequence[(n/2):] elif n % 2 == 1: first_half = sequence[:((n-1)/2)] second_half = sequence[((n+1)/2):] second_half_reversed = second_half[::-1] second_half_rc = [] for i in range(0,len(second_half_reversed)): second_half_rc.append(complements[second_half_reversed[i]]) second_half_rc = ''.join(second_half_rc) if first_half != second_half_rc: no_palindromes.append(sequence) print "Nmers excluding palindromes: ",len(no_palindromes) # remove those which are complementary to chosen sequences # dictionary of all kmers within each sequence forward_kmers = {} for sequence in no_palindromes: kmers = [] for i in range(0,n-k+1): kmer = sequence[i:i+k] if not (kmer in kmers): kmers.append(sequence[i:i+k]) forward_kmers[sequence] = kmers # dictionary of reverse complements of all kmers within each sequence reverse_kmers = {} for sequence in forward_kmers: all_reverse_complements = [] for kmer in forward_kmers[sequence]: reverse = kmer[::-1] reverse_complement = [] for i in range(0,len(reverse)): reverse_complement.append(complements[reverse[i]]) reverse_complement = ''.join(reverse_complement) if not (reverse_complement in all_reverse_complements): all_reverse_complements.append(reverse_complement) reverse_kmers[sequence] = all_reverse_complements no_complements = [] to_remove = [] m = len(no_palindromes) for i in range(0,m-1): for j in range(i+1,m): if set(forward_kmers[no_palindromes[i]]) & set(reverse_kmers[no_palindromes[j]]): to_remove.append(no_palindromes[j]) for i in range(0,m): if not (no_palindromes[i] in to_remove): no_complements.append(no_palindromes[i]) print len(no_complements)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question