From d59b1a3fcc02dd9f21dae684881a4c386d70bb97 Mon Sep 17 00:00:00 2001 From: Holger Brandl <brandl@mpi-cbg.de> Date: Tue, 12 Aug 2014 15:24:56 +0200 Subject: [PATCH] cont. leipzig data processing --- R/ggplot_utils.R | 3 ++- bash/bioinfo_utils.sh | 49 +++++++++++++++++++++++++++++++++++++++ bash/lsf_bioinfo_utils.sh | 34 +++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 bash/bioinfo_utils.sh diff --git a/R/ggplot_utils.R b/R/ggplot_utils.R index 1b12269..72fd7c1 100644 --- a/R/ggplot_utils.R +++ b/R/ggplot_utils.R @@ -37,4 +37,5 @@ makePcaPlot <- function(x = getData(), group = NA, items=rownames(x), title = "" } -makePcaPlot(getData(30,4,2,distort = 0.7), getGroup(30,2)) +## example +# makePcaPlot(getData(30,4,2,distort = 0.7)) diff --git a/bash/bioinfo_utils.sh b/bash/bioinfo_utils.sh new file mode 100644 index 0000000..bcf6307 --- /dev/null +++ b/bash/bioinfo_utils.sh @@ -0,0 +1,49 @@ + +#### Tophat Mapping Report from the logs +TophatMappingReport(){ +## todo replace source with simple R script as for bowtie-report +echo ' +devtools::source_url("http://dl.dropbox.com/u/113630701/rlibs/deepseq/ngs_tools.R") +createMappingReport() +' | R -q --vanilla +} + +#### Bowtie Mapping Report from the logs +Bowtie2MappingReport(){ + +echo ' + +devtools::source_url("http://dl.dropbox.com/u/113630701/rlibs/base-commons.R") + +logSuffix=".logs" +parseAlgnSummary <- function(alignSummary){ + #alignSummary="./H2Az_Rep1_Lane1_Lib4454.bowtie.log" + algnData <- readLines(alignSummary) + + data.frame( + condition=trimEnd(basename(alignSummary), logSuffix), + num_reads=as.numeric(str_split_fixed(algnData[3], " ", 2)[1]), + unique_mappers=as.numeric(str_split_fixed(str_trim(algnData[6]), " ", 2)[1]), + mapping_efficiency=as.numeric(str_replace(str_split_fixed(algnData[8], " ", 2)[1], "%", "")), + multi_mappers=as.numeric(str_split_fixed(str_trim(algnData[7]), " ", 2)[1]) + ) +} + +mapStats <- ldply(list.files(".", logSuffix, full.names=TRUE, recursive=T), parseAlgnSummary, .progress="text") +write.delim(mapStats, file="mapStats.txt") + +ggplot(melt(mapStats), aes(condition, value)) + geom_bar(stat="identity") +facet_wrap(~variable, scales="free") + ggtitle("mapping summary") + scale_y_continuous(labels=comma) + theme(axis.text.x=element_text(angle=90, hjust=0)) +ggsave2(w=10, h=10, p="mapstats") + +ggplot(mapStats, aes(condition, mapping_efficiency)) + geom_bar(stat="identity") +coord_flip() + ylim(0,100) + ggtitle("mapping efficiency") +ggsave2(p="mapstats") +ggplot(mapStats, aes(condition, num_reads)) + geom_bar(stat="identity") + coord_flip() + ggtitle("read counts") +ggsave2(p="mapstats") + +ggplot(mapStats, aes(condition, unique_mappers)) + geom_bar(stat="identity") + coord_flip() + ggtitle("unique alignment") + scale_fill_discrete() +ggsave2(p="mapstats") +' | R --vanilla +} +export -f Bowtie2MappingReport + + diff --git a/bash/lsf_bioinfo_utils.sh b/bash/lsf_bioinfo_utils.sh index 31e518a..df3d2c7 100644 --- a/bash/lsf_bioinfo_utils.sh +++ b/bash/lsf_bioinfo_utils.sh @@ -1,4 +1,5 @@ +## create fastq report for all fastq and fastq.gz files in the current directory mmFastqc(){ outputDir="fastqc_reports" # filePattern="fastq.gz" @@ -17,3 +18,36 @@ mmFastqc(){ } export -f mmFastqc + +### Create a cuffdb on a network of lustre file-systen +MakeCuffdb() { + if [ $# -ne 2 ]; then echo "Usage: MakeCuffdb <gtffile> <genomebuild>"; return; fi + +echo ' +devtools::source_url("http://dl.dropbox.com/u/113630701/rlibs/base-commons.R") +options(width=150) + +require.auto(cummeRbund) + +createCuffDbTrickyDisk <- function(dbDir, gtfFile, genome, ...){ + tmpdir <- tempfile() + system(paste("cp -r", dbDir, tmpdir)) + oldWD <- getwd() + setwd(tmpdir) + cuff <- readCufflinks(rebuild=T, gtf=gtfFile, genome="mm10", ...) +# cuff <- readCufflinks(gtf=gtfFile, genome="mm10", rebuild=T) + + system(paste("cp cuffData.db", dbDir)) + system(paste("rm -r", tmpdir)) + + setwd(oldWD) + return(cuff) +} + +gtfFile=commandArgs(TRUE)[1] +genomeBuild=commandArgs(TRUE)[2] + +createCuffDbTrickyDisk(getwd(), gtfFile, genomeBuild) +' | R -q --no-save --no-restore --args $1 $2 +} +export -f MakeCuffdb -- GitLab