Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
## from naha
PRE=/home/lakshman/mnt/chipseq_huttner/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/*.remdup.bam.sorted.bam
name_suf=.remdup.bam
all_bams=$( ls $PRE)
all_bams_lables=$( ls $PRE | sed 's#.*/##' | sed "s/$name_suf$//")
cd /home/lakshman/mnt/chipseq_huttner/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/
screen -R bamcorrelate
bamCorrelate bins --bamfiles $all_bams --labels $all_bams_lables --binSize 1000 --corMethod spearman -f 200 --numberOfProcessors=8 --colorMap Reds --zMin 0.5 --zMax 1 --plotFile="MOUSE_combined_bams_correlation_remdup.pdf" --outFileCorMatrix="MOUSE_combined_bams_correlation_Matrix.txt"
setwd("/Volumes/albert_chipseq/Naha_analysis/BigWig_Files/Combined_BAMS_20141024/")
library(reshape)
library(stringr)
library(lattice)
library(ggplot2)
data=read.delim("MOUSE_combined_bams_correlation_Matrix.txt", sep="\t", h=T)
data_1=data
rownames(data_1)=data_1$X
data_1$X=NULL
levelplot(as.matrix(data_1))
colorFun <- colorRampPalette(c("yellow","red"))
pdf("Spearman_correlation_matrix_plot.pdf")
levelplot(as.matrix(data_1),scales=list(x=list(rot=90, cex=.7), y=list(cex=.7)), col.regions = colorFun(50),cexRow=0.2, cexCol=0.2)
dev.off()
distance=dist(as.matrix(data_1))
tree=hclust(distance)
plot(tree)
pdf("Hierarchical_clustering_plot.pdf")
plot(tree, hang=-1)
dev.off()
data_trans=data
rownames(data_trans)=data_trans$X
data_trans$X=NULL
pca_chip <- prcomp(data_trans, center=TRUE, scale=T, retx=TRUE)
pc123 <- data.frame(PC1=pca_chip$x[,1], PC2=pca_chip$x[,2], PC3=pca_chip$x[,3])
makePcaPlot <- function(pca, item_names=rownames(pca$x),conditions=str_split_fixed(item_names, "_", 2)[,2]) {
# browser()
percent <- round((((pca$sdev)^2 / sum(pca$sdev^2))*100))
pc12 <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], sample=conditions, items=item_names)
ggplot(pc12, aes(PC1, PC2, color=sample, label=items)) + geom_point()+xlab(paste("PC1 (", percent[1], "%)", sep = "")) + ylab(paste("PC2 (", percent[2], "%)", sep = ""))
}
makePcaPlot(pca_chip) + theme_bw() + ggtitle("Cell type clustering PCA")+geom_vline(xintercept=0, linetype="dashed")+geom_hline(xintercept=0, linetype="dashed")
makePcaPlot_text <- function(pca, item_names=rownames(pca$x),conditions=str_split_fixed(item_names, "_", 2)[,2]) {
# browser()
percent <- round((((pca$sdev)^2 / sum(pca$sdev^2))*100))
pc12 <- data.frame(PC1=pca$x[,1], PC2=pca$x[,2], sample=conditions, items=item_names)
ggplot(pc12, aes(PC1, PC2, color=sample, label=items)) + geom_text()+xlab(paste("PC1 (", percent[1], "%)", sep = "")) + ylab(paste("PC2 (", percent[2], "%)", sep = ""))
}
save.image("cell_clustering.RData")
pdf("PCA_plot.pdf")
makePcaPlot_text(pca_chip) + theme_bw() + ggtitle("Cell type clustering PCA")+geom_vline(xintercept=0, linetype="dashed")+geom_hline(xintercept=0, linetype="dashed")
dev.off()