Progress batman

From CSBLwiki

(Difference between revisions)
Jump to: navigation, search
(How to parse domains from the correlation matrix)
(How to parse domains from the correlation matrix)
 
(2 intermediate revisions not shown)
Line 68: Line 68:
===How to parse domains from the correlation matrix===
===How to parse domains from the correlation matrix===
 +
:Function for Splitting Domains
 +
<pre>
 +
#
 +
splitPos <- function(x,m=start,n=end) { # x = corr.matrix
 +
  chi = NULL; corr = x
 +
  for (i in m:(n-1)) {
 +
    c1 = sum(corr[m:i,m:i])
 +
    a = sum(corr[m:i,(i+1):n])
 +
    c2 = sum(corr[(i+1):n,(i+1):n])
 +
    chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2)
 +
#    print(paste(i,c1,a,c2,chi2))
 +
    chi = c(chi,chi2)
 +
  }
 +
  pos = which(chi==max(chi))
 +
  return(c(m,pos,pos+1,n))
 +
}
 +
 +
tmp = splitPos(corr,1,590)  # split function returns a vector (start,splitPos,splitPos+1,end)
 +
 +
tmp1 = splitPos(corr,tmp[1],tmp[2])
 +
tmp2 = splitPos(corr,tmp[3],tmp[4])
 +
</pre>
 +
 +
:Old
<pre style col="blue">
<pre style col="blue">
 +
A<- 1
 +
B<-590
chi = NULL
chi = NULL
-
for (i in 10:(ncol(count.que)-10)){
+
point =NULL
-
        A = i
+
for (i in A:B-3){
-
       c1 = sum(corr[1:i,1:i])
+
       c1 = sum(corr[(A+1):(i+2),(A+1):(i+2)])
-
         a = sum(corr[1:i ,(i+1):(ncol(count.que))])
+
         a = sum(corr[(A+1):(i+2), (i+2):(B-1)])
-
       c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
+
       c2 = sum(corr[(i+2):(B-1),(i+2):(B-1)])
-
      x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
+
        x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
       # print(paste(A,c1,a,c2 ,x))
       # print(paste(A,c1,a,c2 ,x))
       chi = c(chi,x)
       chi = c(chi,x)
-
       p1 = which(chi==min(chi))+9
+
       p1 = which(chi==max(chi))
-
      # chi[i-9,1:2] = c(A,x)
+
      B<- p1
-
}
+
       point = c(point,p1)
-
chi1= NULL
+
          
-
for (i in 10:p1){
+
-
        A = i
+
-
       c1 = sum(corr[1:i,1:i])
+
-
        a = sum(corr[1:i ,(i+1):(ncol(count.que))])
+
-
      c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
+
-
      x =  (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
+
-
      # print(paste(A,c1,a,c2 ,x))
+
-
      chi1 = c(chi1,x)
+
-
      p2 = which(chi1==min(chi1))+9
+
-
      # chi[i-9,1:2] = c(A,x)
+
-
}
+
-
chi2=NULL
+
-
for (i in p1:(ncol(count.que)-10)){
+
-
         A = i
+
-
      c1 = sum(corr[1:i,1:i])
+
-
        a = sum(corr[1:i ,(i+1):(ncol(count.que))])
+
-
      c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
+
-
      x =  (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
+
-
      # print(paste(A,c1,a,c2 ,x))
+
-
      chi2 = c(chi2,x)
+
-
      p3 = which(chi2==min(chi2))+p1
+
       # chi[i-9,1:2] = c(A,x)
       # chi[i-9,1:2] = c(A,x)
}
}
 +
</pre>
</pre>

Latest revision as of 08:05, 22 April 2011

Contents

2011

Error fetching PMID 12706730:
  1. Error fetching PMID 12706730: [ADDA]

Schedule

ADDA algorithm

How to get residue correlation matrix

# set working directory
setwd("/Users/igchoi/Downloads/")

# read blast table
tmp = read.table("sample.tab",sep="\t")

# check dimension of table
dim(tmp)

# change simpler GI number (skip this if you don't understand)
tmp[,1] = sapply(as.vector(tmp[,1]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })
tmp[,2] = sapply(as.vector(tmp[,2]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })

# check the result
tmp[1:5,]

# aligned region: V7 V8, V9 V10
# query length = 590 residues
count.que = matrix(0,nrow(tmp),590)  # <- build empty 'aligned' matrix
for(i in 1:nrow(tmp)) {
  res = unlist(tmp[i,7:8])
  print(res)
  count.que[i,c(res[1]:res[2])] = 1
}

# check aligned region (blue colored regions are aligned with other proteins - central region)
image(t(count.que),col=c("white","blue"))

# count number of neighbors occurring both position i and j (using 'aligned' matrix)
corr = matrix(0,590,590)  # <- set empty correlation matrix (590 x 590)

for (i in 1:ncol(count.que)) {
  for (j in i:ncol(count.que)) {
    print(paste(i,j))
    chk = count.que[,i]+count.que[,j]
    cnt = length(which(chk==2))
    corr[i,j] = cnt
  }
}

# range of number of neighbors (among 52 hits)
range(corr)

# check highly correlated region in the correlation matrix
image(corr,col=topo.colors(10))

How to parse domains from the correlation matrix

Function for Splitting Domains
#
splitPos <- function(x,m=start,n=end) { # x = corr.matrix
  chi = NULL; corr = x
  for (i in m:(n-1)) {
    c1 = sum(corr[m:i,m:i])
    a = sum(corr[m:i,(i+1):n])
    c2 = sum(corr[(i+1):n,(i+1):n])
    chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2)
#    print(paste(i,c1,a,c2,chi2))
    chi = c(chi,chi2)
  }
  pos = which(chi==max(chi))
  return(c(m,pos,pos+1,n))
}

tmp = splitPos(corr,1,590)  # split function returns a vector (start,splitPos,splitPos+1,end)

tmp1 = splitPos(corr,tmp[1],tmp[2])
tmp2 = splitPos(corr,tmp[3],tmp[4])
Old
A<- 1
B<-590 
chi = NULL
point =NULL
for (i in A:B-3){
       c1 = sum(corr[(A+1):(i+2),(A+1):(i+2)])
        a = sum(corr[(A+1):(i+2), (i+2):(B-1)])
       c2 = sum(corr[(i+2):(B-1),(i+2):(B-1)])
        x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
      # print(paste(A,c1,a,c2 ,x))
       chi = c(chi,x)
       p1 = which(chi==max(chi))
       B<- p1
       point = c(point,p1)
        
      # chi[i-9,1:2] = c(A,x)
}

Personal tools
Namespaces
Variants
Actions
Site
Choi lab
Resources
Toolbox