Progress batman

From CSBLwiki

(Difference between revisions)
Jump to: navigation, search
(How to parse domains from the correlation matrix)
(ADDA algorithm)
Line 68: Line 68:
===How to parse domains from the correlation matrix===
===How to parse domains from the correlation matrix===
 +
:Function for Splitting Domains
 +
<pre>
 +
#
 +
splitPos <- function(x,m=start,n=end) { # x = corr.matrix
 +
  chi = NULL; corr = x
 +
  for (i in m:(n-1)) {
 +
    c1 = sum(corr[m:i,m:i])
 +
    a = sum(corr[m:i,(i+1):n])
 +
    c2 = sum(corr[(i+1):n,(i+1):n])
 +
    chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2)
 +
#    print(paste(i,c1,a,c2,chi2))
 +
    chi = c(chi,chi2)
 +
  }
 +
  pos = which(chi==max(chi))
 +
  return(c(m,pos,pos+1,n))
 +
}
 +
 +
tmp = splitPos(corr,1,590)  # split function returns a vector (start,splitPos,splitPos+1,end)
 +
 +
tmp1 = splitPos(corr,tmp[1],tmp[2])
 +
tmp2 = splitPos(corr,tmp[3],tmp[4])
 +
</pre>
 +
 +
:old
<pre style col="blue">
<pre style col="blue">
chi = NULL
chi = NULL
Line 79: Line 103:
       chi = c(chi,x)
       chi = c(chi,x)
       p1 = which(chi==min(chi))+9
       p1 = which(chi==min(chi))+9
-
      # chi[i-9,1:2] = c(A,x)
 
-
}
 
-
chi1= NULL
 
-
for (i in 10:p1){
 
-
        A = i
 
-
      c1 = sum(corr[1:i,1:i])
 
-
        a = sum(corr[1:i ,(i+1):(ncol(count.que))])
 
-
      c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
 
-
      x =  (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
 
-
      # print(paste(A,c1,a,c2 ,x))
 
-
      chi1 = c(chi1,x)
 
-
      p2 = which(chi1==min(chi1))+9
 
-
      # chi[i-9,1:2] = c(A,x)
 
-
}
 
-
chi2=NULL
 
-
for (i in p1:(ncol(count.que)-10)){
 
-
        A = i
 
-
      c1 = sum(corr[1:i,1:i])
 
-
        a = sum(corr[1:i ,(i+1):(ncol(count.que))])
 
-
      c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
 
-
      x =  (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
 
-
      # print(paste(A,c1,a,c2 ,x))
 
-
      chi2 = c(chi2,x)
 
-
      p3 = which(chi2==min(chi2))+p1
 
       # chi[i-9,1:2] = c(A,x)
       # chi[i-9,1:2] = c(A,x)
}
}
</pre>
</pre>

Revision as of 15:38, 14 April 2011

Contents

2011

Error fetching PMID 12706730:
  1. Error fetching PMID 12706730: [ADDA]

Schedule

ADDA algorithm

How to get residue correlation matrix

# set working directory
setwd("/Users/igchoi/Downloads/")

# read blast table
tmp = read.table("sample.tab",sep="\t")

# check dimension of table
dim(tmp)

# change simpler GI number (skip this if you don't understand)
tmp[,1] = sapply(as.vector(tmp[,1]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })
tmp[,2] = sapply(as.vector(tmp[,2]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })

# check the result
tmp[1:5,]

# aligned region: V7 V8, V9 V10
# query length = 590 residues
count.que = matrix(0,nrow(tmp),590)  # <- build empty 'aligned' matrix
for(i in 1:nrow(tmp)) {
  res = unlist(tmp[i,7:8])
  print(res)
  count.que[i,c(res[1]:res[2])] = 1
}

# check aligned region (blue colored regions are aligned with other proteins - central region)
image(t(count.que),col=c("white","blue"))

# count number of neighbors occurring both position i and j (using 'aligned' matrix)
corr = matrix(0,590,590)  # <- set empty correlation matrix (590 x 590)

for (i in 1:ncol(count.que)) {
  for (j in i:ncol(count.que)) {
    print(paste(i,j))
    chk = count.que[,i]+count.que[,j]
    cnt = length(which(chk==2))
    corr[i,j] = cnt
  }
}

# range of number of neighbors (among 52 hits)
range(corr)

# check highly correlated region in the correlation matrix
image(corr,col=topo.colors(10))

How to parse domains from the correlation matrix

Function for Splitting Domains
#
splitPos <- function(x,m=start,n=end) { # x = corr.matrix
  chi = NULL; corr = x
  for (i in m:(n-1)) {
    c1 = sum(corr[m:i,m:i])
    a = sum(corr[m:i,(i+1):n])
    c2 = sum(corr[(i+1):n,(i+1):n])
    chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2)
#    print(paste(i,c1,a,c2,chi2))
    chi = c(chi,chi2)
  }
  pos = which(chi==max(chi))
  return(c(m,pos,pos+1,n))
}

tmp = splitPos(corr,1,590)  # split function returns a vector (start,splitPos,splitPos+1,end)

tmp1 = splitPos(corr,tmp[1],tmp[2])
tmp2 = splitPos(corr,tmp[3],tmp[4])
old
chi = NULL
for (i in 10:(ncol(count.que)-10)){
        A = i
       c1 = sum(corr[1:i,1:i])
        a = sum(corr[1:i ,(i+1):(ncol(count.que))])
       c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
       x =  (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
      # print(paste(A,c1,a,c2 ,x))
       chi = c(chi,x)
       p1 = which(chi==min(chi))+9
      # chi[i-9,1:2] = c(A,x)
}
Personal tools
Namespaces
Variants
Actions
Site
Choi lab
Resources
Toolbox