diff --git a/dge_workflow/dge_analysis.R b/dge_workflow/dge_analysis.R index 5fc17e4f62903b326399ef92ff46ac9a723a638f..d4b1e53b187c0d0c9ffab3910ec001ad62dba18b 100755 --- a/dge_workflow/dge_analysis.R +++ b/dge_workflow/dge_analysis.R @@ -16,7 +16,7 @@ Options: #opts <- docopt(doc, commandArgs(TRUE)) ## does not work when spining -#opts <- docopt(doc,"cuffdir") +#opts <- docopt(doc, "--undirected ..") opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" "))) #opts @@ -72,7 +72,7 @@ replicates(cuff) %>% mutate(file=basename(file)) %>% select(-c(total_mass, norm_ ## todo merge in basic gene info into all tables ## add gene description -martName <- guess_mart(degs$gene_id) +martName <- guess_mart(fpkm(genes(cuff))$gene_id) cacheFile <- paste0("geneInfo.",martName, ".RData") if(!file.exists(cacheFile)){ require(biomaRt) @@ -85,23 +85,44 @@ if(!file.exists(cacheFile)){ } -gFpkmMatrix <- rownames2column(fpkmMatrix(genes(cuff)), "ensembl_gene_id") + #colnames(gFpkmMatrix) <- ifelse(is.na(sampleDic[colnames(gFpkmMatrix)]), colnames(gFpkmMatrix), sampleDic[colnames(gFpkmMatrix)]) -geneFpkmWithInfo <- merge(geneInfo, gFpkmMatrix, by.x="ensembl_gene_id", all.y=T) #%>% print_head() -write.delim(geneFpkmWithInfo, file="geneFpkmWithInfo.txt") +fpkmMatrix(genes(cuff)) %>% + rownames2column("ensembl_gene_id") %>% + merge(geneInfo, by.x="ensembl_gene_id", all.x=T) %>% + write.delim(file="geneFpkmWithInfo.txt") # geneFpkmWithInfo <- read.delim("geneFpkmWithInfo.txt") #' [Annotated Expresssion Table](geneFpkmWithInfo.txt) +## export the same but now including replicate information + + + +repFpkmMatrix(genes(cuff)) %>% + rownames2column("ensembl_gene_id") %>% + merge(geneInfo, by.x="ensembl_gene_id", all.x=T) %>% + write.delim(file="geneReplicateFpkmWithInfo.txt") +# geneFpkmWithInfo <- read.delim("geneReplicateFpkmWithInfo.txt") +#' [Annotated Expresssion Table](geneReplicateFpkmWithInfo.txt) + +#+ eval=FALSE, echo=FALSE +## DEBUG start +if(F){ +read.delim("geneReplicateFpkmWithInfo.txt") %>% +filter(external_gene_name=="Insm1") %>% +melt() %>% +ggplot(aes(variable, value)) + geom_bar(stat="identity") +} +## DEBUG end + + geneFpkm<-fpkm(genes(cuff)) #ggplot(geneFpkm, aes(fpkm)) + geom_histogram() + scale_x_log10() write.delim(geneFpkm, file="geneFpkm.txt") # gene.fpkm <- read.delim("gene.fpkm.txt") -#' [FPKMs](geneFpkm.txt) - -repGeneFPKMs <- repFpkmMatrix(genes(cuff)) write.delim(repGeneFPKMs, file="repGeneFPKMs.txt") # repGeneFPKMs <- read.delim("repGeneFPKMs.txt") # repGeneFPKMs <- local(get(load("repGeneFPKMs.RData"))) @@ -209,10 +230,11 @@ allDiff <- diffData(genes(cuff)) %>% -#allDiff <- transform(allDiff, isHit=p_value<=0.01) +#' Used cutoff criterion was: p_value<0.01 +allDiff <- transform(allDiff, isHit=p_value<=0.01) -#' Used cutoff criterion was: q_value<0.01 -allDiff <- transform(allDiff, isHit=q_value<0.01) +##' Used cutoff criterion was: q_value<0.01 +#allDiff <- transform(allDiff, isHit=q_value<0.01) degs <- subset(allDiff, isHit) #degs <- subset(allDiff, significant=="yes") diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 64272ee1e7662583956d9bf086a5be6ec9ead4d0..f62511ed2a8db36363216a7f53251f57c2c5ed83 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -10,6 +10,7 @@ export PATH=/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:$PATH export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH export PATH=/sw/apps/python/current/bin:$PATH export PATH=/home/brandl/bin/deepTools/bin:$PATH +# which tophat; which bowtie2; which cuffdiff export R_LIBS=/tmp/r_index ## export to make sure that packages are load from local repository, otherwise sqlite won't work