Progress batman
From CSBLwiki
(Difference between revisions)
(→Schedule) |
(→How to parse domains from the correlation matrix) |
||
(19 intermediate revisions not shown) | |||
Line 1: | Line 1: | ||
- | [[배형섭]] | + | {| align="right" cellpadding=15 |
+ | | __TOC__ | ||
+ | |} | ||
+ | <!-- :[[배형섭|go to 배형섭 페이지]] --> | ||
==2011== | ==2011== | ||
*Reference: [[media:JMB_adda.pdf|PDF download]] | *Reference: [[media:JMB_adda.pdf|PDF download]] | ||
Line 7: | Line 10: | ||
==Schedule== | ==Schedule== | ||
+ | <!-- #[[media:2월첫째주.xls|2월첫째주 ]] --> | ||
- | + | ||
+ | ==ADDA algorithm== | ||
+ | ===How to get residue correlation matrix=== | ||
+ | *sample data: [[media:sample.tab.txt|sample.tab]] | ||
+ | <pre style col="blue"> | ||
+ | # set working directory | ||
+ | setwd("/Users/igchoi/Downloads/") | ||
+ | |||
+ | # read blast table | ||
+ | tmp = read.table("sample.tab",sep="\t") | ||
+ | |||
+ | # check dimension of table | ||
+ | dim(tmp) | ||
+ | |||
+ | # change simpler GI number (skip this if you don't understand) | ||
+ | tmp[,1] = sapply(as.vector(tmp[,1]), function(x) { | ||
+ | y = strsplit(x,"\\|"); return(unlist(y)[2]) }) | ||
+ | tmp[,2] = sapply(as.vector(tmp[,2]), function(x) { | ||
+ | y = strsplit(x,"\\|"); return(unlist(y)[2]) }) | ||
+ | |||
+ | # check the result | ||
+ | tmp[1:5,] | ||
+ | |||
+ | # aligned region: V7 V8, V9 V10 | ||
+ | # query length = 590 residues | ||
+ | count.que = matrix(0,nrow(tmp),590) # <- build empty 'aligned' matrix | ||
+ | for(i in 1:nrow(tmp)) { | ||
+ | res = unlist(tmp[i,7:8]) | ||
+ | print(res) | ||
+ | count.que[i,c(res[1]:res[2])] = 1 | ||
+ | } | ||
+ | |||
+ | # check aligned region (blue colored regions are aligned with other proteins - central region) | ||
+ | image(t(count.que),col=c("white","blue")) | ||
+ | |||
+ | # count number of neighbors occurring both position i and j (using 'aligned' matrix) | ||
+ | corr = matrix(0,590,590) # <- set empty correlation matrix (590 x 590) | ||
+ | |||
+ | for (i in 1:ncol(count.que)) { | ||
+ | for (j in i:ncol(count.que)) { | ||
+ | print(paste(i,j)) | ||
+ | chk = count.que[,i]+count.que[,j] | ||
+ | cnt = length(which(chk==2)) | ||
+ | corr[i,j] = cnt | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # range of number of neighbors (among 52 hits) | ||
+ | range(corr) | ||
+ | |||
+ | # check highly correlated region in the correlation matrix | ||
+ | image(corr,col=topo.colors(10)) | ||
+ | |||
+ | </pre> | ||
+ | |||
+ | ===How to parse domains from the correlation matrix=== | ||
+ | :Function for Splitting Domains | ||
+ | <pre> | ||
+ | # | ||
+ | splitPos <- function(x,m=start,n=end) { # x = corr.matrix | ||
+ | chi = NULL; corr = x | ||
+ | for (i in m:(n-1)) { | ||
+ | c1 = sum(corr[m:i,m:i]) | ||
+ | a = sum(corr[m:i,(i+1):n]) | ||
+ | c2 = sum(corr[(i+1):n,(i+1):n]) | ||
+ | chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2) | ||
+ | # print(paste(i,c1,a,c2,chi2)) | ||
+ | chi = c(chi,chi2) | ||
+ | } | ||
+ | pos = which(chi==max(chi)) | ||
+ | return(c(m,pos,pos+1,n)) | ||
+ | } | ||
+ | |||
+ | tmp = splitPos(corr,1,590) # split function returns a vector (start,splitPos,splitPos+1,end) | ||
+ | |||
+ | tmp1 = splitPos(corr,tmp[1],tmp[2]) | ||
+ | tmp2 = splitPos(corr,tmp[3],tmp[4]) | ||
+ | </pre> | ||
+ | |||
+ | :Old | ||
+ | <pre style col="blue"> | ||
+ | A<- 1 | ||
+ | B<-590 | ||
+ | chi = NULL | ||
+ | point =NULL | ||
+ | for (i in A:B-3){ | ||
+ | c1 = sum(corr[(A+1):(i+2),(A+1):(i+2)]) | ||
+ | a = sum(corr[(A+1):(i+2), (i+2):(B-1)]) | ||
+ | c2 = sum(corr[(i+2):(B-1),(i+2):(B-1)]) | ||
+ | x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2)) | ||
+ | # print(paste(A,c1,a,c2 ,x)) | ||
+ | chi = c(chi,x) | ||
+ | p1 = which(chi==max(chi)) | ||
+ | B<- p1 | ||
+ | point = c(point,p1) | ||
+ | |||
+ | # chi[i-9,1:2] = c(A,x) | ||
+ | } | ||
+ | |||
+ | </pre> |
Latest revision as of 08:05, 22 April 2011
|
2011
- Reference: PDF download
Error fetching PMID 12706730:
- Error fetching PMID 12706730:
Schedule
ADDA algorithm
How to get residue correlation matrix
- sample data: sample.tab
# set working directory setwd("/Users/igchoi/Downloads/") # read blast table tmp = read.table("sample.tab",sep="\t") # check dimension of table dim(tmp) # change simpler GI number (skip this if you don't understand) tmp[,1] = sapply(as.vector(tmp[,1]), function(x) { y = strsplit(x,"\\|"); return(unlist(y)[2]) }) tmp[,2] = sapply(as.vector(tmp[,2]), function(x) { y = strsplit(x,"\\|"); return(unlist(y)[2]) }) # check the result tmp[1:5,] # aligned region: V7 V8, V9 V10 # query length = 590 residues count.que = matrix(0,nrow(tmp),590) # <- build empty 'aligned' matrix for(i in 1:nrow(tmp)) { res = unlist(tmp[i,7:8]) print(res) count.que[i,c(res[1]:res[2])] = 1 } # check aligned region (blue colored regions are aligned with other proteins - central region) image(t(count.que),col=c("white","blue")) # count number of neighbors occurring both position i and j (using 'aligned' matrix) corr = matrix(0,590,590) # <- set empty correlation matrix (590 x 590) for (i in 1:ncol(count.que)) { for (j in i:ncol(count.que)) { print(paste(i,j)) chk = count.que[,i]+count.que[,j] cnt = length(which(chk==2)) corr[i,j] = cnt } } # range of number of neighbors (among 52 hits) range(corr) # check highly correlated region in the correlation matrix image(corr,col=topo.colors(10))
How to parse domains from the correlation matrix
- Function for Splitting Domains
# splitPos <- function(x,m=start,n=end) { # x = corr.matrix chi = NULL; corr = x for (i in m:(n-1)) { c1 = sum(corr[m:i,m:i]) a = sum(corr[m:i,(i+1):n]) c2 = sum(corr[(i+1):n,(i+1):n]) chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2) # print(paste(i,c1,a,c2,chi2)) chi = c(chi,chi2) } pos = which(chi==max(chi)) return(c(m,pos,pos+1,n)) } tmp = splitPos(corr,1,590) # split function returns a vector (start,splitPos,splitPos+1,end) tmp1 = splitPos(corr,tmp[1],tmp[2]) tmp2 = splitPos(corr,tmp[3],tmp[4])
- Old
A<- 1 B<-590 chi = NULL point =NULL for (i in A:B-3){ c1 = sum(corr[(A+1):(i+2),(A+1):(i+2)]) a = sum(corr[(A+1):(i+2), (i+2):(B-1)]) c2 = sum(corr[(i+2):(B-1),(i+2):(B-1)]) x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2)) # print(paste(A,c1,a,c2 ,x)) chi = c(chi,x) p1 = which(chi==max(chi)) B<- p1 point = c(point,p1) # chi[i-9,1:2] = c(A,x) }