Progress batman
From CSBLwiki
(Difference between revisions)
(→How to parse domains from the correlation matrix) |
(→How to parse domains from the correlation matrix) |
||
Line 69: | Line 69: | ||
===How to parse domains from the correlation matrix=== | ===How to parse domains from the correlation matrix=== | ||
<pre style col="blue"> | <pre style col="blue"> | ||
- | |||
chi = NULL | chi = NULL | ||
for (i in 10:(ncol(count.que)-10)){ | for (i in 10:(ncol(count.que)-10)){ | ||
Line 106: | Line 105: | ||
# chi[i-9,1:2] = c(A,x) | # chi[i-9,1:2] = c(A,x) | ||
} | } | ||
- | |||
- | |||
- | |||
</pre> | </pre> |
Revision as of 07:47, 14 April 2011
|
2011
- Reference: PDF download
Error fetching PMID 12706730:
- Error fetching PMID 12706730:
Schedule
ADDA algorithm
How to get residue correlation matrix
- sample data: sample.tab
# set working directory setwd("/Users/igchoi/Downloads/") # read blast table tmp = read.table("sample.tab",sep="\t") # check dimension of table dim(tmp) # change simpler GI number (skip this if you don't understand) tmp[,1] = sapply(as.vector(tmp[,1]), function(x) { y = strsplit(x,"\\|"); return(unlist(y)[2]) }) tmp[,2] = sapply(as.vector(tmp[,2]), function(x) { y = strsplit(x,"\\|"); return(unlist(y)[2]) }) # check the result tmp[1:5,] # aligned region: V7 V8, V9 V10 # query length = 590 residues count.que = matrix(0,nrow(tmp),590) # <- build empty 'aligned' matrix for(i in 1:nrow(tmp)) { res = unlist(tmp[i,7:8]) print(res) count.que[i,c(res[1]:res[2])] = 1 } # check aligned region (blue colored regions are aligned with other proteins - central region) image(t(count.que),col=c("white","blue")) # count number of neighbors occurring both position i and j (using 'aligned' matrix) corr = matrix(0,590,590) # <- set empty correlation matrix (590 x 590) for (i in 1:ncol(count.que)) { for (j in i:ncol(count.que)) { print(paste(i,j)) chk = count.que[,i]+count.que[,j] cnt = length(which(chk==2)) corr[i,j] = cnt } } # range of number of neighbors (among 52 hits) range(corr) # check highly correlated region in the correlation matrix image(corr,col=topo.colors(10))
How to parse domains from the correlation matrix
chi = NULL for (i in 10:(ncol(count.que)-10)){ A = i c1 = sum(corr[1:i,1:i]) a = sum(corr[1:i ,(i+1):(ncol(count.que))]) c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))]) x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2)) # print(paste(A,c1,a,c2 ,x)) chi = c(chi,x) p1 = which(chi==min(chi))+9 # chi[i-9,1:2] = c(A,x) } chi1= NULL for (i in 10:p1){ A = i c1 = sum(corr[1:i,1:i]) a = sum(corr[1:i ,(i+1):(ncol(count.que))]) c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))]) x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2)) # print(paste(A,c1,a,c2 ,x)) chi1 = c(chi1,x) p2 = which(chi1==min(chi1))+9 # chi[i-9,1:2] = c(A,x) } chi2=NULL for (i in p1:(ncol(count.que)-10)){ A = i c1 = sum(corr[1:i,1:i]) a = sum(corr[1:i ,(i+1):(ncol(count.que))]) c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))]) x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2)) # print(paste(A,c1,a,c2 ,x)) chi2 = c(chi2,x) p3 = which(chi2==min(chi2))+p1 # chi[i-9,1:2] = c(A,x) }