Progress batman

From CSBLwiki

(Difference between revisions)

Jump to: navigation, search

Revision as of 15:38, 14 April 2011

2011

Reference: PDF download

Error fetching PMID 12706730:

Error fetching PMID 12706730: [ADDA]

Schedule

ADDA algorithm

How to get residue correlation matrix

sample data: sample.tab

# set working directory
setwd("/Users/igchoi/Downloads/")

# read blast table
tmp = read.table("sample.tab",sep="\t")

# check dimension of table
dim(tmp)

# change simpler GI number (skip this if you don't understand)
tmp[,1] = sapply(as.vector(tmp[,1]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })
tmp[,2] = sapply(as.vector(tmp[,2]), function(x) {
  y = strsplit(x,"\\|"); return(unlist(y)[2]) })

# check the result
tmp[1:5,]

# aligned region: V7 V8, V9 V10
# query length = 590 residues
count.que = matrix(0,nrow(tmp),590)  # <- build empty 'aligned' matrix
for(i in 1:nrow(tmp)) {
  res = unlist(tmp[i,7:8])
  print(res)
  count.que[i,c(res[1]:res[2])] = 1
}

# check aligned region (blue colored regions are aligned with other proteins - central region)
image(t(count.que),col=c("white","blue"))

# count number of neighbors occurring both position i and j (using 'aligned' matrix)
corr = matrix(0,590,590)  # <- set empty correlation matrix (590 x 590)

for (i in 1:ncol(count.que)) {
  for (j in i:ncol(count.que)) {
    print(paste(i,j))
    chk = count.que[,i]+count.que[,j]
    cnt = length(which(chk==2))
    corr[i,j] = cnt
  }
}

# range of number of neighbors (among 52 hits)
range(corr)

# check highly correlated region in the correlation matrix
image(corr,col=topo.colors(10))

How to parse domains from the correlation matrix

Function for Splitting Domains

#
splitPos <- function(x,m=start,n=end) { # x = corr.matrix
  chi = NULL; corr = x
  for (i in m:(n-1)) {
    c1 = sum(corr[m:i,m:i])
    a = sum(corr[m:i,(i+1):n])
    c2 = sum(corr[(i+1):n,(i+1):n])
    chi2 = (((c1*c2)-(a*a))^2)/((c1+a)^2*(c2+a)^2)
#    print(paste(i,c1,a,c2,chi2))
    chi = c(chi,chi2)
  }
  pos = which(chi==max(chi))
  return(c(m,pos,pos+1,n))
}

tmp = splitPos(corr,1,590)  # split function returns a vector (start,splitPos,splitPos+1,end)

tmp1 = splitPos(corr,tmp[1],tmp[2])
tmp2 = splitPos(corr,tmp[3],tmp[4])

old

chi = NULL
for (i in 10:(ncol(count.que)-10)){
        A = i
       c1 = sum(corr[1:i,1:i])
        a = sum(corr[1:i ,(i+1):(ncol(count.que))])
       c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
       x =  (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
      # print(paste(A,c1,a,c2 ,x))
       chi = c(chi,x)
       p1 = which(chi==min(chi))+9
      # chi[i-9,1:2] = c(A,x)
}

Progress batman

From CSBLwiki

Revision as of 15:38, 14 April 2011

Contents

2011

Schedule

ADDA algorithm

How to get residue correlation matrix

How to parse domains from the correlation matrix

Personal tools

Namespaces

Variants

Views

Actions

Search

Site

Choi lab

Resources

Toolbox