Skip to content
Snippets Groups Projects
Commit 392ebd3d authored by Holger Brandl's avatar Holger Brandl
Browse files

added actin binding analysis

parent 80e7bca7
No related branches found
No related tags found
No related merge requests found
...@@ -18,7 +18,7 @@ Options: ...@@ -18,7 +18,7 @@ Options:
# opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), "..")) # opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), ".."))
opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" "))) opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" ")))
#opts # opts
skipQC <<- opts$S skipQC <<- opts$S
split_hit_list <- !opts$undirected split_hit_list <- !opts$undirected
...@@ -93,9 +93,9 @@ fpkmMatrix(genes(cuff)) %>% ...@@ -93,9 +93,9 @@ fpkmMatrix(genes(cuff)) %>%
rownames2column("ensembl_gene_id") %>% rownames2column("ensembl_gene_id") %>%
merge(geneInfo, by="ensembl_gene_id", all.x=T) %>% merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
print_head() %>% print_head() %>%
write.delim(file="gene_fpkm.txt") write.delim(file="gene_fpkms.txt")
# gene_fpkm <- read.delim("gene_fpkm.txt") # gene_fpkms <- read.delim("gene_fpkms.txt")
#' [Annotated Expresssion Table](gene_fpkm.txt) #' [Annotated Expresssion Table](gene_fpkms.txt)
## export the same but now including replicate information ## export the same but now including replicate information
...@@ -103,9 +103,9 @@ repFpkmMatrix(genes(cuff)) %>% ...@@ -103,9 +103,9 @@ repFpkmMatrix(genes(cuff)) %>%
rownames2column("ensembl_gene_id") %>% rownames2column("ensembl_gene_id") %>%
merge(geneInfo, by="ensembl_gene_id", all.x=T) %>% merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
print_head() %>% print_head() %>%
write.delim(file="gene_fpkm_by_replicate.txt") write.delim(file="gene_fpkms_by_replicate.txt")
# gene_fpkm <- read.delim("gene_fpkm_by_replicate.txt") # gene_fpkms <- read.delim("gene_fpkms_by_replicate.txt")
#' [Annotated Expresssion Table](gene_fpkm_by_replicate.txt) #' [Annotated Expresssion Table](gene_fpkms_by_replicate.txt)
fpkm(genes(cuff)) %>% fpkm(genes(cuff)) %>%
dplyr::rename(ensembl_gene_id=gene_id) %>% dplyr::rename(ensembl_gene_id=gene_id) %>%
...@@ -113,16 +113,18 @@ fpkm(genes(cuff)) %>% ...@@ -113,16 +113,18 @@ fpkm(genes(cuff)) %>%
write.delim(file="geneFpkm.txt") write.delim(file="geneFpkm.txt")
## also export count tables
count(genes(cuff)) %>% count(genes(cuff)) %>%
dplyr::rename(ensembl_gene_id=gene_id) %>% dplyr::rename(ensembl_gene_id=gene_id) %>%
merge(geneInfo, by="ensembl_gene_id", all.x=T) merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
write.delim(file="geneCounts.txt") write.delim(file="gene_counts.txt")
# geneCounts <- read.delim("geneCounts.txt") # gene_counts <- read.delim("gene_counts.txt")
#' [Gene Fragment Counts](geneCounts.txt) #' [Gene Fragment Counts](gene_counts.txt)
countMatrix(genes(cuff)) %>% countMatrix(genes(cuff)) %>%
dplyr::rename(ensembl_gene_id=gene_id) %>% rownames2column("ensembl_gene_id") %>%
merge(geneInfo, by="ensembl_gene_id", all.x=T) merge(geneInfo, by="ensembl_gene_id", all.x=T) %>%
write.delim(file="replicateCounts.txt") write.delim(file="replicateCounts.txt")
# repCounts <- read.delim("replicateCounts.txt") # repCounts <- read.delim("replicateCounts.txt")
#' [Raw Counts by Replicate](replicateCounts.txt) #' [Raw Counts by Replicate](replicateCounts.txt)
...@@ -158,7 +160,7 @@ fpkmSCVPlot(genes(cuff)) ...@@ -158,7 +160,7 @@ fpkmSCVPlot(genes(cuff))
#csVolcano(genes(cuff),"a", "b") #csVolcano(genes(cuff),"a", "b")
## todo replace or complement with marta plot
csVolcanoMatrix(genes(cuff)) csVolcanoMatrix(genes(cuff))
#csVolcano(genes(cuff),"a", "b") #csVolcano(genes(cuff),"a", "b")
...@@ -175,10 +177,13 @@ if(unlen(replicates(cuff)$sample)>2){ ## >2 only, because MDS will fail with ju ...@@ -175,10 +177,13 @@ if(unlen(replicates(cuff)$sample)>2){ ## >2 only, because MDS will fail with ju
MDSplot(genes(cuff), replicates=T) + ggtitle("mds plot") MDSplot(genes(cuff), replicates=T) + ggtitle("mds plot")
PCAplot(genes(cuff),"PC1","PC2",replicates=T) ## todo replace with marta plot
#PCAplot(genes(cuff),"PC1","PC2",replicates=T)
noTextLayer <- function(gg){ gg$layers[[2]] <- NULL;; gg} noTextLayer <- function(gg){ gg$layers[[2]] <- NULL;; gg}
## todo use fix scaled here
noTextLayer(csDistHeat(genes(cuff)) + ggtitle("mouse zone correlation")) noTextLayer(csDistHeat(genes(cuff)) + ggtitle("mouse zone correlation"))
noTextLayer(csDistHeat(genes(cuff),replicates=T) + ggtitle("mouse replicate correlation")) #+ scale_fill_gradientn(colours=c(low='lightyellow',mid='orange',high='darkred'), limits=c(0,0.15)) noTextLayer(csDistHeat(genes(cuff),replicates=T) + ggtitle("mouse replicate correlation")) #+ scale_fill_gradientn(colours=c(low='lightyellow',mid='orange',high='darkred'), limits=c(0,0.15))
...@@ -201,7 +206,7 @@ if(!is.null(constrasts_file)){ ...@@ -201,7 +206,7 @@ if(!is.null(constrasts_file)){
# if(file.exists(constrasts_file)) # if(file.exists(constrasts_file))
echo("using contrasts matrix from: ", basename(constrasts_file)) echo("using contrasts matrix from: ", basename(constrasts_file))
compPairsUniDir <- read.delim(opts$sample_pairs) %>% set_names("sample_1", "sample_2") compPairsUniDir <- read.csv(constrasts_file, header=F) %>% set_names(c("sample_1", "sample_2"))
## add other direction for completeness ## add other direction for completeness
compPairs <- rbind_list(compPairsUniDir, compPairsUniDir %>% select(sample_1=sample_2, sample_2=sample_1)) compPairs <- rbind_list(compPairsUniDir, compPairsUniDir %>% select(sample_1=sample_2, sample_2=sample_1))
...@@ -216,24 +221,28 @@ allDiff <- diffData(genes(cuff)) %>% ...@@ -216,24 +221,28 @@ allDiff <- diffData(genes(cuff)) %>%
merge(compPairs) %>% merge(compPairs) %>%
## discard all genes that are not expressed in any of the cell types (>1) ## discard all genes that are not expressed in any of the cell types (>1)
filter(gene_id %in% getExpressedGenes(cuff)) filter(gene_id %in% getExpressedGenes(cuff)) %>%
dplyr::rename(ensembl_gene_id=gene_id) %>%
mutate(sample_1_overex=log2_fold_change<0)
##' Used cutoff criterion was: p_value<0.01
#allDiff <- transform(allDiff, isHit=p_value<=0.01)
#' Used cutoff criterion was: p_value<0.01 #' Used cutoff criterion was: q_value<0.01
allDiff <- transform(allDiff, isHit=p_value<=0.01) allDiff <- transform(allDiff, isHit=q_value<0.01)
##' Used cutoff criterion was: q_value<0.01
#allDiff <- transform(allDiff, isHit=q_value<0.01)
degs <- subset(allDiff, isHit) degs <- subset(allDiff, isHit)
#degs <- subset(allDiff, significant=="yes") #degs <- subset(allDiff, significant=="yes")
degs <- transform(degs, sample_1_overex=log2_fold_change<0)
with(degs, as.data.frame(table(sample_1, sample_2, sample_1_overex))) %>% filter(Freq >0) with(degs, as.data.frame(table(sample_1, sample_2, sample_1_overex))) %>% filter(Freq >0)
## add gene info
degs <- merge(degs, geneInfo, by="ensembl_gene_id", all.x=T)
degs <- merge(degs, geneInfo, by.x="gene_id", by.="ensembl_gene_id", all.x=T)
write.delim(degs, file="degs.txt") write.delim(degs, file="degs.txt")
# degs <- read.delim("degs.txt") # degs <- read.delim("degs.txt")
...@@ -274,7 +283,8 @@ ontologies <- c( ...@@ -274,7 +283,8 @@ ontologies <- c(
require(RDAVIDWebService) ## just works if installed on non-network-drive (e.g. /tmp/) require(RDAVIDWebService) ## just works if installed on non-network-drive (e.g. /tmp/)
geneLists <- degs %>% geneLists <- degs %>%
transmute(gene_id, list_id=paste(sample_1, "vs", sample_2, "ovex", sample_1_overex, sep="_")) # transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2, "ovex", sample_1_overex, sep="_"))
transmute(ensembl_gene_id, list_id=paste(sample_1, "vs", sample_2))
## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf ## http://www.bioconductor.org/packages/release/bioc/vignettes/RDAVIDWebService/inst/doc/RDavidWS-vignette.pdf
...@@ -283,7 +293,7 @@ geneLists <- degs %>% ...@@ -283,7 +293,7 @@ geneLists <- degs %>%
davidAnnotationChart <- function(someGenes){ ## expexted to have a column with gene_id davidAnnotationChart <- function(someGenes){ ## expexted to have a column with gene_id
# echo("processing list with", length(someGenes), "genes") # echo("processing list with", length(someGenes), "genes")
# someGenes <- degs$gene_id # someGenes <- degs$ensembl_gene_id
if(length(someGenes)>1500){ if(length(someGenes)>1500){
...@@ -307,7 +317,7 @@ davidAnnotationChart <- function(someGenes){ ## expexted to have a column with g ...@@ -307,7 +317,7 @@ davidAnnotationChart <- function(someGenes){ ## expexted to have a column with g
} }
enrResults <- degs %>% group_by(sample_1, sample_2, sample_1_overex) %>% do(davidAnnotationChart(.$gene_id)) enrResults <- degs %>% group_by(sample_1, sample_2, sample_1_overex) %>% do(davidAnnotationChart(.$ensembl_gene_id))
write.delim(enrResults, file="enrResults.txt") write.delim(enrResults, file="enrResults.txt")
# enrResults <- read.delim("enrResults.txt") # enrResults <- read.delim("enrResults.txt")
...@@ -382,7 +392,7 @@ sigEnrResults %>% do({ ...@@ -382,7 +392,7 @@ sigEnrResults %>% do({
#keggPathways %>% rowwise() %>% do({ #keggPathways %>% rowwise() %>% do({
# curKP <- . # curKP <- .
# curKP <- keggPathways[1,] # curKP <- keggPathways[1,]
# geneData <- merge(curKP, degs) %>% transmute(gene_id, log2_fold_change) %>% column2rownames("gene_id") # geneData <- merge(curKP, degs) %>% transmute(ensembl_gene_id, log2_fold_change) %>% column2rownames("ensembl_gene_id")
# ## prepare gene data # ## prepare gene data
# #
# pv.out <- pathview(gene.data = geneData, pathway.id = curKP$kegg.pathway_id, species = "mmu", out.suffix = curKP$kegg.description, node.sum = "mean", limit = list(gene = 3, cpd = 3), gene.idtype="ensembl") # pv.out <- pathview(gene.data = geneData, pathway.id = curKP$kegg.pathway_id, species = "mmu", out.suffix = curKP$kegg.description, node.sum = "mean", limit = list(gene = 3, cpd = 3), gene.idtype="ensembl")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment