## from naha

PRE=/home/lakshman/mnt/chipseq_huttner/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/*.remdup.bam.sorted.bam
name_suf=.remdup.bam
all_bams=$( ls $PRE)
all_bams_lables=$( ls $PRE | sed 's#.*/##' | sed "s/$name_suf$//")

cd /home/lakshman/mnt/chipseq_huttner/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/
screen -R bamcorrelate

bamCorrelate bins --bamfiles $all_bams --labels $all_bams_lables --binSize 1000 --corMethod spearman -f 200 --numberOfProcessors=8 --colorMap Reds --zMin 0.5 --zMax 1 --plotFile="MOUSE_combined_bams_correlation_remdup.pdf" --outFileCorMatrix="MOUSE_combined_bams_correlation_Matrix.txt"

setwd("/Volumes/albert_chipseq/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/")
library(reshape)
library(stringr)
library(lattice)
library(ggplot2)
data=read.delim("MOUSE_combined_bams_correlation_Matrix.txt", sep="\t", h=T)
data_1=data
rownames(data_1)=data_1$X
data_1$X=NULL
levelplot(as.matrix(data_1))
colorFun <- colorRampPalette(c("yellow","red"))
pdf("Spearman_correlation_matrix_plot.pdf")
levelplot(as.matrix(data_1),scales=list(x=list(rot=90, cex=.7), y=list(cex=.7)), col.regions = colorFun(50),cexRow=0.2, cexCol=0.2)
dev.off()

distance=dist(as.matrix(data_1))
tree=hclust(distance)
plot(tree)
pdf("Hierarchical_clustering_plot.pdf")
plot(tree, hang=-1)
dev.off()

data_trans=data
rownames(data_trans)=data_trans$X
data_trans$X=NULL
pca_chip <- prcomp(data_trans, center=TRUE, scale=T, retx=TRUE)
pc123 <- data.frame(PC1=pca_chip$x[,1], PC2=pca_chip$x[,2],  PC3=pca_chip$x[,3])

makePcaPlot <- function(pca, item_names=rownames(pca$x),conditions=str_split_fixed(item_names, "_", 2)[,2]) {
#    browser()
  percent <- round((((pca$sdev)^2 / sum(pca$sdev^2))*100))


  pc12 <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], sample=conditions, items=item_names)

  ggplot(pc12, aes(PC1, PC2, color=sample, label=items)) + geom_point()+xlab(paste("PC1 (", percent[1], "%)", sep = "")) + ylab(paste("PC2 (", percent[2], "%)", sep = ""))

}
makePcaPlot(pca_chip) + theme_bw() + ggtitle("Cell type clustering PCA")+geom_vline(xintercept=0, linetype="dashed")+geom_hline(xintercept=0, linetype="dashed")

makePcaPlot_text <- function(pca, item_names=rownames(pca$x),conditions=str_split_fixed(item_names, "_", 2)[,2]) {
#    browser()
  percent <- round((((pca$sdev)^2 / sum(pca$sdev^2))*100))


  pc12 <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], sample=conditions, items=item_names)

  ggplot(pc12, aes(PC1, PC2, color=sample, label=items)) + geom_text()+xlab(paste("PC1 (", percent[1], "%)", sep = "")) + ylab(paste("PC2 (", percent[2], "%)", sep = ""))

}
save.image("cell_clustering.RData")
pdf("PCA_plot.pdf")
makePcaPlot_text(pca_chip) + theme_bw() + ggtitle("Cell type clustering PCA")+geom_vline(xintercept=0, linetype="dashed")+geom_hline(xintercept=0, linetype="dashed")
dev.off()