import numpy as np import sys #import Bio.PDB #from scipy import linalg #import os from time import clock ######################################################### # Cython Stuff # ######################################################### cimport numpy as np cimport cython from libc.math cimport sqrt from libc.math cimport M_E DTYPE = np.float64 ctypedef np.float64_t DTYPE_t ######################################################### # Global Variables # ######################################################### cdef float cutoff= .03 cdef np.ndarray derivativeEpsilonList= np.zeros(9, dtype=DTYPE) cdef np.ndarray epsilonList= np.zeros(3, dtype=DTYPE) cdef np.ndarray noiseFailure= np.empty(0, dtype=DTYPE) ######################################################### # Functions # ######################################################### @cython.boundscheck(False) cdef calcSimilarityMatrix(N, atomMode): cdef np.ndarray RMSD=np.zeros((N,N), dtype=DTYPE) for i in range(N): for j in range(i): RMSD[i][j]=leastRMSD(i, j, atomMode) RMSD[j][i]=RMSD[i][j] return RMSD @cython.boundscheck(False) cdef DTYPE_t leastRMSD(i, j, atomMode): ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Calculates least RMSD between two structures using BioPython ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ##biopython omitted return np.random.random()*3 @cython.boundscheck(False) cdef DTYPE_t createEpsilonLists(np.ndarray[DTYPE_t, ndim=2] RMSD): cdef DTYPE_t maxEpsilon=np.amax(RMSD) print maxEpsilon cdef DTYPE_t a=(3./7.)*maxEpsilon cdef DTYPE_t b= (1./2.)*maxEpsilon cdef DTYPE_t c=(4./7.)*maxEpsilon epsilonList[0]= a epsilonList[1]= b epsilonList[2]= c print(epsilonList) for i,v in enumerate(epsilonList): derivativeEpsilonList[i*3]= v-.001 derivativeEpsilonList[i*3+1]= v derivativeEpsilonList[i*3+2]= v+.001 return maxEpsilon @cython.boundscheck(False) cdef np.ndarray calcEpsilonArray(np.ndarray[DTYPE_t, ndim=2] RMSD): ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' 1. Uses RMSD to get length of the available epsilons 2. Creates epsilon list and calculates optimal epsilon value using getEpsilon() ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' cdef unsigned int N=RMSD.shape[0] cdef np.ndarray epsilonArray=np.zeros(N, dtype=DTYPE) cdef unsigned int xi for xi in range(N): epsilonArray[xi]=getEpsilon(xi, RMSD, derivativeEpsilonList, N) return epsilonArray @cython.boundscheck(False) cdef DTYPE_t getEpsilon(unsigned int xi, np.ndarray[DTYPE_t, ndim=2] RMSD, np.ndarray[DTYPE_t, ndim=1] derivativeEpsilonList, unsigned int N): ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' 1. Calculates sorted eigenvalues for three different epsilons using SVD method 2. Inputs eigenvalue matrix into getIntrinsicScaleAndDim() 3. Returns optimal epsilon value ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' cdef np.ndarray eigenvalues=np.zeros((len(derivativeEpsilonList),N), dtype= DTYPE) cdef np.ndarray neighbors = np.empty(0, dtype=np.int) cdef np.ndarray neighborsMatrix cdef np.ndarray A cdef unsigned int i, j, x, y cdef DTYPE_t e, k cdef DTYPE_t eps for i,e in enumerate(derivativeEpsilonList): neighbors = np.zeros(N) #find all models which are within RMSD dist e of model xi for j in xrange(N): if(RMSD[xi,j]<=e+.01): np.insert(neighbors, 0, j) #print(neighbors) if len(neighbors)>1: neighborsLength=len(neighbors) neighborsMatrix=np.zeros((neighborsLength,neighborsLength)) #creates matrix of neighbors for x in range(neighborsLength): for y in range(neighborsLength): neighborsMatrix[x][y]=RMSD[x,neighbors[y]] A = np.linalg.svd(neighborsMatrix, compute_uv=False) for j,k in enumerate(A): eigenvalues[i][j]=k*k else: print "maxEpsilon too small! Try again with a larger maxEpsilon value." #epsilonFailure.append(str(xi)+str(e)) return derivativeEpsilonList[i+3] #quit() eps= getIntrinsicScaleAndDim(xi,eigenvalues) return eps @cython.boundscheck(False) cdef DTYPE_t getIntrinsicScaleAndDim(xi, np.ndarray[DTYPE_t, ndim=2] eigenvalues): cdef np.ndarray statusVectors=np.zeros((len(epsilonList), eigenvalues.shape[1] ), dtype = DTYPE) cdef np.ndarray dStatusVectors=np.zeros((len(epsilonList), eigenvalues.shape[1]), dtype = DTYPE) cdef np.ndarray derivativeArray=np.zeros((len(epsilonList),len(eigenvalues[0])), dtype=DTYPE) cdef unsigned int cols= eigenvalues.shape[1] cdef unsigned int i, j, k cdef int maxStart #calculating the status vectors for the three epsilon values. #the eigenvalues matrix has 9 rows with one value slightly above and one slightly below each of 3 epsilons to calculate the derivative. #the status vector matrix only has 3 rows, one for each epsilon #b*3+1 scales the 0-2 range to get the indicies 1, 4, 7 to get the eigenvalues corresponding to the correct epsilon #each element N in the status vector for an epsilon e is calculated by subtrating the Nth eigenvalue for that epsilon from the N+1th eigenvalue for i in xrange(epsilonList.shape[0]): for j in range(cols-1): statusVectors[i][j]=eigenvalues[i*3+1][j+1]-eigenvalues[i*3+1][j] #converting the status vector to discrete 1's and 0's for i in xrange(epsilonList.shape[0]): for j in xrange(cols-1): if statusVectors[i][j]>2*statusVectors[i][j+1] and statusVectors[i][j]>2*statusVectors[i][j+2] and statusVectors[i][j]>2*statusVectors[i][j+3] and statusVectors[i][j]>2*statusVectors[i][j+4] and statusVectors[i][j]>2*statusVectors[i][j+5]: dStatusVectors[i][j]=1 else: dStatusVectors[i][j]=0 start=np.zeros(len(epsilonList),dtype=np.int) #figure out seperation between noise and non-noise eigenvalues for i in xrange(epsilonList.shape[0]): for j in xrange(len(dStatusVectors[0])-2): if(dStatusVectors[i][j]==1 and dStatusVectors[i][j+1]==1 and dStatusVectors[i][j+2]==0 and dStatusVectors[i][j+3]==0 and dStatusVectors[i][j+4]==0): start[i]=(j+2) #print "start row %s = %s"%(i, start) break if start[i]==0: #noiseFailure.append("atom "+str(xi)+" epsilon "+str(epsilonList[i])) return epsilonList[0] maxStart=max(start) #calc derivative array for i in range(maxStart, len(eigenvalues[0])): for k in range(0,len(eigenvalues)/3): slicedEigenValueArray=eigenvalues[:,i] slicedEigenValueArray=slicedEigenValueArray[k*3:k*3+3] slicedEpsilonList=derivativeEpsilonList[k*3:k*3+3] derivativeArray[k][i-maxStart]=calculateDerivative(slicedEigenValueArray,slicedEpsilonList) #determine optimal epsilon value for k in range(derivativeArray.shape[1]): for i in range(derivativeArray.shape[0]): convergence=True for j in range(k,derivativeArray.shape[1]): if derivativeArray[i][j]==0: convergence=False if convergence==True: return epsilonList[i] #print "ERROR" #derivativeFailure.append("atom"+str(xi)) #derivativeFailure.append("atom"+str(xi)) @cython.boundscheck(False) cdef np.ndarray calcTransitionMatrix(np.ndarray[DTYPE_t, ndim=2] RMSD, np.ndarray[DTYPE_t, ndim=1] epsilonArray): ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Constructs a Markov matrix P with three steps 1. Uses RMSD matrix and epsilon values to calculate kernal matrix, K 2. Compute D using K matrix (summation), compute Ktilda by using the formula Ktilda/sqrt(D*D), create Dtilda matrix by computing summation 3. P = Ktilda/Dtilda ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' #getting data from input file cdef int N = len(RMSD) cdef np.ndarray K=np.zeros((N,N), dtype=DTYPE) cdef np.ndarray D=np.zeros(N, dtype=DTYPE) #making sure matricies are the right size #calculating K and D for i in range(N): for j in range(N): K[i][j] = M_E**((-RMSD[i][j]**2)/(2*epsilonArray[i]*epsilonArray[j])) D[i] += K[i][j] #creating Ktilda matrix, Dtilda array, and outfile cdef np.ndarray Ktilda=np.zeros((N,N), dtype=DTYPE) cdef np.ndarray Dtilda=np.zeros(N, dtype=DTYPE) #calculating Ktilda and Dtilda for i in xrange(N): for j in xrange(N): Ktilda[i][j]=K[i][j]/sqrt(D[i]*D[j]) Dtilda[i]+=Ktilda[i][j] #creating P matrix and writing P to outfile cdef np.ndarray P=np.zeros((N,N), dtype=DTYPE) for i in xrange(N): for j in xrange(N): P[i][j]=Ktilda[i][j]/Dtilda[i] return P ######################################################### # Mathematical Functions # ######################################################### @cython.boundscheck(False) cdef unsigned int calculateDerivative(np.ndarray[DTYPE_t, ndim=1] eigenValueArray, np.ndarray[DTYPE_t, ndim=1] epsilonArray): derivative=(eigenValueArray[2]-eigenValueArray[0])/(epsilonArray[2]-epsilonArray[0]) if derivative<cutoff: return 1 else: return 0 @cython.boundscheck(False) cdef np.ndarray getProj(np.ndarray[DTYPE_t, ndim=2] P, np.ndarray[DTYPE_t, ndim=2] e_vecs): cdef unsigned int i,j projMatrix=np.zeros((P.shape[1],e_vecs.shape[0])) for i in range(projMatrix.shape[0]): for j in range(projMatrix.shape[1]): projMatrix[i][j]=dotProd(P[i],e_vecs[j]) return projMatrix @cython.boundscheck(False) cdef np.ndarray dotProd(np.ndarray[DTYPE_t, ndim=2] Pi, np.ndarray[DTYPE_t, ndim=2] EVi): EVi=np.transpose(EVi) return np.dot(Pi, EVi) @cython.boundscheck(False) cdef np.ndarray calculateAccumulatedVariance(np.ndarray[DTYPE_t, ndim=1] eigenvalues): #accumulate variances accVars = eigenvalues.copy() nevals = accVars.shape[0] for i in range(1, nevals): previous = accVars[i-1] accVars[i] += previous for i in range(0, nevals): accVars[i] /= accVars[nevals-1] accVars[i] *= 100.0 return accVars @cython.boundscheck(False) cdef np.ndarray removeNegatives(np.ndarray[DTYPE_t, ndim=1] evals_sorted): evals_sorted_positive=np.zeros(len(evals_sorted)) for i,v in enumerate(evals_sorted): if v>0: evals_sorted_positive[i]=v return evals_sorted_positive @cython.boundscheck(False) cdef eigDecomposeTransMatrix(np.ndarray[DTYPE_t, ndim=2] P, order): ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' Calculates eigenvalues and eigenvectors, and sorts them Order=0 for ascending, Order = 1 for descending ''''''''''''''''''''''''''''''''''''''''''''''''''''''''' cdef np.ndarray eigenvalues,eigenvectors eigenvalues, eigenvectors= np.linalg.eig(P) cdef np.ndarray evals_sorted = np.zeros(eigenvalues.size, dtype=DTYPE) cdef np.ndarray evecs_sorted = np.zeros((eigenvectors.shape[0], eigenvectors.shape[1]), dtype=DTYPE) if order == 0: sorted_evals_idx = eigenvalues.argsort() evals_sorted = eigenvalues[sorted_evals_idx] evecs_sorted = eigenvectors[sorted_evals_idx] if order == 1: sorted_evals_idx = eigenvalues.argsort()[::-1] evals_sorted = eigenvalues[sorted_evals_idx] evecs_sorted = eigenvectors[sorted_evals_idx] return [evals_sorted,evecs_sorted] ######################################################### # Main # ######################################################### def main(inFileName, atomMode, cutoff): startTime=clock() cdef np.ndarray RMSD=calcSimilarityMatrix(50, atomMode) maxEpsilon=createEpsilonLists(RMSD) print "calculating epsilon array" cdef np.ndarray epsilonArray=calcEpsilonArray(RMSD) print "calculating transition matrix" cdef np.ndarray P=calcTransitionMatrix(RMSD, epsilonArray) print "eigen decomposition" l=eigDecomposeTransMatrix(P,1) cdef np.ndarray evals_sorted, evecs_sorted evals_sorted=l[0] evecs_sorted=l[1] cdef np.ndarray evals_sorted_positive =removeNegatives(evals_sorted) print "calculating accumulated variance" cdef np.ndarray accumVar= calculateAccumulatedVariance(evals_sorted_positive) endTime=clock() elapsedTime= endTime - startTime print "Elapsed time: %s seconds"%(elapsedTime)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question