Skip to content
Snippets Groups Projects
Commit 4d6dab8f authored by Holger Brandl's avatar Holger Brandl
Browse files

cont. chipseq analysis

parent 102187c6
No related branches found
No related tags found
Loading
......@@ -35,26 +35,38 @@ countData <- list.files(".", "region_counts.txt", f=T) %>% ldply(function(countF
separate(sample, c("protein", "timepoint"), remove=F)
#' count data structure
countData %>% sample_n(10) %>% kable()
write.delim(countData, file="countData.txt")
# countData <- read.delim("countData.txt")
#' [countData](countData.txt)
countData %>% head() %>% kable()
#corMat <- countData %>% select(ensembl_gene_id, feature_type, tag_count, sample) %>% dcast(ensembl_gene_id + feature_type ~ sample, value.var="tag_count")
#require(GGally)
#corMat %>% filter(feature_type=="tss_1kb") %>% select(-feature_type) %>% ggpairs()
## does not work because of logarithmic scale
## also load library sizes for normalization
libSizes <- read.delim("algn_counts.txt", header=F) %>% set_names(c("library_size", "sample"))
## calculate fpkm normalized counts
countDataNorm <- countData %>%
merge(libSizes, all.x=T) %>%
group_by(feature_type, protein, timepoint) %>%
mutate(tag_fpkm=(1E9*tag_count)/(feature_length*sum(tag_count))) %>%
mutate(tag_fpkm100=100*tag_fpkm)
mutate(tag_fpkm=(1E9*tag_count)/(feature_length*library_size))
# mutate(tag_fpkm=(1E9*tag_count)/(feature_length*sum(tag_count))) %>%
# mutate(tag_fpkm100=100*tag_fpkm)
countDataNorm %>%
sample_n(10000) %>%
ggplot(aes(tag_fpkm)) +
geom_histogram() +
scale_x_log10(labels=comma) +
ggtitle("fpkm dist overview")
#+ fig.width=20, fig.height=18
countDataNorm %>% ggplot(aes(tag_fpkm)) +
......@@ -66,3 +78,46 @@ write.delim(countDataNorm, file="countDataNorm.txt")
# countDataNorm <- read.delim("countDataNorm.txt")
#' [countDataNorm](countDataNorm.txt)
## simple correlation
timeCor <- countDataNorm %>% ungroup() %>%
filter(feature_type=="tss_2kb") %>%
select(ensembl_gene_id, tag_fpkm, protein, timepoint) %>%
merge(., ., by=c("ensembl_gene_id", "protein")) %>%
filter(ac(timepoint.x)>ac(timepoint.y)) %>%
rename(sample_1=timepoint.x, sample_2=timepoint.y)
timeCorPlot <- timeCor %>% ggplot(aes(tag_fpkm.x, tag_fpkm.y)) +
geom_point(alpha=0.05) +
ggtitle("Timepoint Correlation") +
facet_grid(sample_1~protein+sample_2)+
# facet_wrap(timepoint+sample_1~sample_2)+
# scale_x_log10() + scale_y_log10()
scale_x_log10(labels=comma, limits=c(0.01, 10000)) + scale_y_log10(labels=comma, limits=c(0.01, 10000)) +
geom_abline(slope = 1, alpha=0.3, size=2, color="red") +
xlab("fpkm") + ylab("ylab")
ggsave2(timeCorPlot, outputFormat="pdf", width=12, height=10)
#
#corrPlot <- function(fpkmData){
## fpkmData <- mmusDiff
# hitCounts <- subset(fpkmData, isHit) %>%
# with(as.data.frame(table(sample_1, sample_2, sample_1_overex=log2_fold_change<0))) %>%
# subset(Freq>0) %>% mutate(sample_1_overex=as.logical(sample_1_overex))
#
# ggplot(fpkmData, aes(value_1, value_2, color=isHit)) +
# scale_color_manual(values=c("darkgrey", "red"))+
## scale_size_manual(values=c(0.4, 4)) +
# geom_point(alpha=0.6, size=2) +
## scale_x_log10(labels=comma) + scale_y_log10(labels=comma) +
# scale_x_log10(labels=comma, limits=c(0.01, 10000)) + scale_y_log10(labels=comma, limits=c(0.01, 10000)) +
# facet_grid(sample_1 ~ sample_2) +
# geom_text(aes(label=Freq), color="black", data=hitCounts %>% mutate(value_1=500., value_2=0.1), subset=.(sample_1_overex)) +
# geom_text(aes(label=Freq),color="black", data=hitCounts %>% mutate(value_1=0.1, value_2=500.), subset=.(!sample_1_overex)) +
# theme_bw() + xlab("") + ylab("") + guides(colour = guide_legend(override.aes = list(size=4)))
#}
#
#mmusFpkmCorr <- allDiff %>% arrange(isHit) %>% corrPlot() + ggtitle("Mouse FPKM Correlation") + facet_grid(; mmusFpkmCorr
#
#ggsave2(mmusFpkmCorr, outputFormat="png", width=9, height=7)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment