From CSBLwiki
2011
Error fetching PMID 12706730:
- Error fetching PMID 12706730: [ADDA]
Schedule
ADDA algorithm
How to get residue correlation matrix
# set working directory
setwd("/Users/igchoi/Downloads/")
# read blast table
tmp = read.table("sample.tab",sep="\t")
# check dimension of table
dim(tmp)
# change simpler GI number (skip this if you don't understand)
tmp[,1] = sapply(as.vector(tmp[,1]), function(x) {
y = strsplit(x,"\\|"); return(unlist(y)[2]) })
tmp[,2] = sapply(as.vector(tmp[,2]), function(x) {
y = strsplit(x,"\\|"); return(unlist(y)[2]) })
# check the result
tmp[1:5,]
# aligned region: V7 V8, V9 V10
# query length = 590 residues
count.que = matrix(0,nrow(tmp),590) # <- build empty 'aligned' matrix
for(i in 1:nrow(tmp)) {
res = unlist(tmp[i,7:8])
print(res)
count.que[i,c(res[1]:res[2])] = 1
}
# check aligned region (blue colored regions are aligned with other proteins - central region)
image(t(count.que),col=c("white","blue"))
# count number of neighbors occurring both position i and j (using 'aligned' matrix)
corr = matrix(0,590,590) # <- set empty correlation matrix (590 x 590)
for (i in 1:ncol(count.que)) {
for (j in i:ncol(count.que)) {
print(paste(i,j))
chk = count.que[,i]+count.que[,j]
cnt = length(which(chk==2))
corr[i,j] = cnt
}
}
# range of number of neighbors (among 52 hits)
range(corr)
# check highly correlated region in the correlation matrix
image(corr,col=topo.colors(10))
How to parse domains from the correlation matrix
chi = NULL
for (i in 10:(ncol(count.que)-10)){
A = i
c1 = sum(corr[1:i,1:i])
a = sum(corr[1:i ,(i+1):(ncol(count.que))])
c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
# print(paste(A,c1,a,c2 ,x))
chi = c(chi,x)
p1 = which(chi==min(chi))+9
# chi[i-9,1:2] = c(A,x)
}
chi1= NULL
for (i in 10:p1){
A = i
c1 = sum(corr[1:i,1:i])
a = sum(corr[1:i ,(i+1):(ncol(count.que))])
c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
# print(paste(A,c1,a,c2 ,x))
chi1 = c(chi1,x)
p2 = which(chi1==min(chi1))+9
# chi[i-9,1:2] = c(A,x)
}
chi2=NULL
for (i in p1:(ncol(count.que)-10)){
A = i
c1 = sum(corr[1:i,1:i])
a = sum(corr[1:i ,(i+1):(ncol(count.que))])
c2 = sum(corr[(i+1):(ncol(count.que)),(i+1):(ncol(count.que))])
x = (((c1*c2)-(a*a))^2)/(((c1+a)^2)*((c2+a)^2))
# print(paste(A,c1,a,c2 ,x))
chi2 = c(chi2,x)
p3 = which(chi2==min(chi2))+p1
# chi[i-9,1:2] = c(A,x)
}