Progress batman

From CSBLwiki

(Difference between revisions)
Jump to: navigation, search
(2011)
(How to parse domains from the correlation matrix)
 
(26 intermediate revisions not shown)
Line 1: Line 1:
-
[[배형섭]]
+
{| align="right" cellpadding=15
 +
| __TOC__
 +
|}
 +
<!-- :[[배형섭|go to 배형섭 페이지]] -->
==2011==
==2011==
*Reference: [[media:JMB_adda.pdf|PDF download]]
*Reference: [[media:JMB_adda.pdf|PDF download]]
Line 5: Line 8:
ADDA pmid=12706730
ADDA pmid=12706730
</biblio>
</biblio>
 +
 +
==Schedule==
 +
<!-- #[[media:2월첫째주.xls|2월첫째주 ]] -->
 +
 +
 +
==ADDA algorithm==
 +
===How to get residue correlation matrix===
 +
*sample data: [[media:sample.tab.txt|sample.tab]]
 +
<pre style col="blue">
 +
# set working directory
 +
setwd("/Users/igchoi/Downloads/")
 +
 +
# read blast table
 +
tmp = read.table("sample.tab",sep="\t")
 +
 +
# check dimension of table
 +
dim(tmp)
 +
 +
# change simpler GI number (skip this if you don't understand)
 +
tmp[,1] = sapply(as.vector(tmp[,1]), function(x) {
 +
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })
 +
tmp[,2] = sapply(as.vector(tmp[,2]), function(x) {
 +
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })
 +
 +
# check the result
 +
tmp[1:5,]
 +
 +
# aligned region: V7 V8, V9 V10
 +
# query length = 590 residues
 +
count.que = matrix(0,nrow(tmp),590)  # <- build empty 'aligned' matrix
 +
for(i in 1:nrow(tmp)) {
 +
  res = unlist(tmp[i,7:8])
 +
  print(res)
 +
  count.que[i,c(res[1]:res[2])] = 1
 +
}
 +
 +
# check aligned region (blue colored regions are aligned with other proteins - central region)
 +
image(t(count.que),col=c("white","blue"))
 +
 +
# count number of neighbors occurring both position i and j (using 'aligned' matrix)
 +
corr = matrix(0,590,590)  # <- set empty correlation matrix (590 x 590)
 +
 +
for (i in 1:ncol(count.que)) {
 +
  for (j in i:ncol(count.que)) {
 +
    print(paste(i,j))
 +
    chk = count.que[,i]+count.que[,j]
 +
    cnt = length(which(chk==2))
 +
    corr[i,j] = cnt
 +
  }
 +
}
 +
 +
# range of number of neighbors (among 52 hits)
 +
range(corr)
 +
 +
# check highly correlated region in the correlation matrix
 +
image(corr,col=topo.colors(10))
 +
 +
</pre>
 +
 +
===How to parse domains from the correlation matrix===
 +
:Function for Splitting Domains
 +
<pre>
 +
#
 +
splitPos <- function(x,m=start,n=end) { # x = corr.matrix
 +
  chi = NULL; corr = x
 +
  for (i in m:(n-1)) {
 +
    c1 = sum(corr[m:i,m:i])
 +
    a = sum(corr[m:i,(i+1):n])
 +
    c2 = sum(corr[(i+1):n,(i+1):n])
 +
    chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2)
 +
#    print(paste(i,c1,a,c2,chi2))
 +
    chi = c(chi,chi2)
 +
  }
 +
  pos = which(chi==max(chi))
 +
  return(c(m,pos,pos+1,n))
 +
}
 +
 +
tmp = splitPos(corr,1,590)  # split function returns a vector (start,splitPos,splitPos+1,end)
 +
 +
tmp1 = splitPos(corr,tmp[1],tmp[2])
 +
tmp2 = splitPos(corr,tmp[3],tmp[4])
 +
</pre>
 +
 +
:Old
 +
<pre style col="blue">
 +
A<- 1
 +
B<-590
 +
chi = NULL
 +
point =NULL
 +
for (i in A:B-3){
 +
      c1 = sum(corr[(A+1):(i+2),(A+1):(i+2)])
 +
        a = sum(corr[(A+1):(i+2), (i+2):(B-1)])
 +
      c2 = sum(corr[(i+2):(B-1),(i+2):(B-1)])
 +
        x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
 +
      # print(paste(A,c1,a,c2 ,x))
 +
      chi = c(chi,x)
 +
      p1 = which(chi==max(chi))
 +
      B<- p1
 +
      point = c(point,p1)
 +
       
 +
      # chi[i-9,1:2] = c(A,x)
 +
}
 +
 +
</pre>

Latest revision as of 08:05, 22 April 2011

Contents

2011

Error fetching PMID 12706730:
  1. Error fetching PMID 12706730: [ADDA]

Schedule

ADDA algorithm

How to get residue correlation matrix

# set working directory
setwd("/Users/igchoi/Downloads/")

# read blast table
tmp = read.table("sample.tab",sep="\t")

# check dimension of table
dim(tmp)

# change simpler GI number (skip this if you don't understand)
tmp[,1] = sapply(as.vector(tmp[,1]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })
tmp[,2] = sapply(as.vector(tmp[,2]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })

# check the result
tmp[1:5,]

# aligned region: V7 V8, V9 V10
# query length = 590 residues
count.que = matrix(0,nrow(tmp),590)  # <- build empty 'aligned' matrix
for(i in 1:nrow(tmp)) {
  res = unlist(tmp[i,7:8])
  print(res)
  count.que[i,c(res[1]:res[2])] = 1
}

# check aligned region (blue colored regions are aligned with other proteins - central region)
image(t(count.que),col=c("white","blue"))

# count number of neighbors occurring both position i and j (using 'aligned' matrix)
corr = matrix(0,590,590)  # <- set empty correlation matrix (590 x 590)

for (i in 1:ncol(count.que)) {
  for (j in i:ncol(count.que)) {
    print(paste(i,j))
    chk = count.que[,i]+count.que[,j]
    cnt = length(which(chk==2))
    corr[i,j] = cnt
  }
}

# range of number of neighbors (among 52 hits)
range(corr)

# check highly correlated region in the correlation matrix
image(corr,col=topo.colors(10))

How to parse domains from the correlation matrix

Function for Splitting Domains
#
splitPos <- function(x,m=start,n=end) { # x = corr.matrix
  chi = NULL; corr = x
  for (i in m:(n-1)) {
    c1 = sum(corr[m:i,m:i])
    a = sum(corr[m:i,(i+1):n])
    c2 = sum(corr[(i+1):n,(i+1):n])
    chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2)
#    print(paste(i,c1,a,c2,chi2))
    chi = c(chi,chi2)
  }
  pos = which(chi==max(chi))
  return(c(m,pos,pos+1,n))
}

tmp = splitPos(corr,1,590)  # split function returns a vector (start,splitPos,splitPos+1,end)

tmp1 = splitPos(corr,tmp[1],tmp[2])
tmp2 = splitPos(corr,tmp[3],tmp[4])
Old
A<- 1
B<-590 
chi = NULL
point =NULL
for (i in A:B-3){
       c1 = sum(corr[(A+1):(i+2),(A+1):(i+2)])
        a = sum(corr[(A+1):(i+2), (i+2):(B-1)])
       c2 = sum(corr[(i+2):(B-1),(i+2):(B-1)])
        x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
      # print(paste(A,c1,a,c2 ,x))
       chi = c(chi,x)
       p1 = which(chi==max(chi))
       B<- p1
       point = c(point,p1)
        
      # chi[i-9,1:2] = c(A,x)
}

Personal tools
Namespaces
Variants
Actions
Site
Choi lab
Resources
Toolbox