## from naha PRE=/home/lakshman/mnt/chipseq_huttner/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/*.remdup.bam.sorted.bam name_suf=.remdup.bam all_bams=$( ls $PRE) all_bams_lables=$( ls $PRE | sed 's#.*/##' | sed "s/$name_suf$//") cd /home/lakshman/mnt/chipseq_huttner/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/ screen -R bamcorrelate bamCorrelate bins --bamfiles $all_bams --labels $all_bams_lables --binSize 1000 --corMethod spearman -f 200 --numberOfProcessors=8 --colorMap Reds --zMin 0.5 --zMax 1 --plotFile="MOUSE_combined_bams_correlation_remdup.pdf" --outFileCorMatrix="MOUSE_combined_bams_correlation_Matrix.txt" setwd("/Volumes/albert_chipseq/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/") library(reshape) library(stringr) library(lattice) library(ggplot2) data=read.delim("MOUSE_combined_bams_correlation_Matrix.txt", sep="\t", h=T) data_1=data rownames(data_1)=data_1$X data_1$X=NULL levelplot(as.matrix(data_1)) colorFun <- colorRampPalette(c("yellow","red")) pdf("Spearman_correlation_matrix_plot.pdf") levelplot(as.matrix(data_1),scales=list(x=list(rot=90, cex=.7), y=list(cex=.7)), col.regions = colorFun(50),cexRow=0.2, cexCol=0.2) dev.off() distance=dist(as.matrix(data_1)) tree=hclust(distance) plot(tree) pdf("Hierarchical_clustering_plot.pdf") plot(tree, hang=-1) dev.off() data_trans=data rownames(data_trans)=data_trans$X data_trans$X=NULL pca_chip <- prcomp(data_trans, center=TRUE, scale=T, retx=TRUE) pc123 <- data.frame(PC1=pca_chip$x[,1], PC2=pca_chip$x[,2], PC3=pca_chip$x[,3]) makePcaPlot <- function(pca, item_names=rownames(pca$x),conditions=str_split_fixed(item_names, "_", 2)[,2]) { # browser() percent <- round((((pca$sdev)^2 / sum(pca$sdev^2))*100)) pc12 <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], sample=conditions, items=item_names) ggplot(pc12, aes(PC1, PC2, color=sample, label=items)) + geom_point()+xlab(paste("PC1 (", percent[1], "%)", sep = "")) + ylab(paste("PC2 (", percent[2], "%)", sep = "")) } makePcaPlot(pca_chip) + theme_bw() + ggtitle("Cell type clustering PCA")+geom_vline(xintercept=0, linetype="dashed")+geom_hline(xintercept=0, linetype="dashed") makePcaPlot_text <- function(pca, item_names=rownames(pca$x),conditions=str_split_fixed(item_names, "_", 2)[,2]) { # browser() percent <- round((((pca$sdev)^2 / sum(pca$sdev^2))*100)) pc12 <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], sample=conditions, items=item_names) ggplot(pc12, aes(PC1, PC2, color=sample, label=items)) + geom_text()+xlab(paste("PC1 (", percent[1], "%)", sep = "")) + ylab(paste("PC2 (", percent[2], "%)", sep = "")) } save.image("cell_clustering.RData") pdf("PCA_plot.pdf") makePcaPlot_text(pca_chip) + theme_bw() + ggtitle("Cell type clustering PCA")+geom_vline(xintercept=0, linetype="dashed")+geom_hline(xintercept=0, linetype="dashed") dev.off()