Skip to content
Snippets Groups Projects
Commit 8a595336 authored by Holger Brandl's avatar Holger Brandl
Browse files

initial commit of dge workflow

parent eb740601
No related branches found
No related tags found
No related merge requests found
......@@ -190,6 +190,7 @@ rmerge <- function(LDF, by, ...){
trim_ext <- function(fileNames, ...) trimEnd(fileNames, ...)
trimEnd <-function(fileNames, exts=c()){
for(fileExt in exts){
fileNames <- str_replace(fileNames, paste(fileExt, "$",sep=""), "")
......
## a thin wrapper around spin to make it more useful with more custom output
require(knitr)
require(stringr)
......@@ -7,14 +8,6 @@ options(width=150)
#https://groups.google.com/forum/#!topic/knitr/ojcnq5Nm298
## better table css: http://www.stat.ubc.ca/~jenny/STAT545A/topic10_tablesCSS.html
#setwd("/local/home/brandl/mnt/mack/project-raphael/reports/spin_report")
#setwd("/home/brandl/mnt/mack/project-raphael/reports/spin_report")
#rScript='/home/brandl/mnt/mack/project-raphael/Rcode/misc/DivisionPerpendicularity.R'
#rScript='/home/brandl/mnt/mack/project-raphael/Rcode/misc/Test.R'
#spinr <- function(rScript){
spin(rScript, knit=F)
mdScript <- str_replace(rScript, "[.]R$", ".Rmd")
......@@ -36,7 +29,3 @@ opts_chunk$set(cache = TRUE, fig.width=10, width=120)
knit2html(basename(mdScript), header=cssHeader)
file.remove(basename(mdScript))
#}
# spinr("/home/brandl/mnt/mack/project-raphael/Rcode/misc/DivisionPerpendicularity.R")
# spinr("/home/brandl/mnt/mack/project-raphael/Rcode/misc/Test.R")
## docs
## http://blog.joncairns.com/2013/08/what-you-need-to-know-about-bash-functions/
## create fastq report for all fastq and fastq.gz files in the current directory
dge_fastqc(){
while getopts "o:" curopt; do
case $curopt in
o) outputDir=$OPTARG;
esac
done
shift $(($OPTIND - 1))
local fastqFiles=$*
#if [ -z "$fastqFiles" ]; then
if [ $# -lt 1 ]; then
echo "Usage: dge_fastqc [-o <output_directory>] [<fastq.gz file>]+" >&2 ; return;
fi
## use default directory if not specified
if [ -z "$outputDir" ]; then
outputDir="fastqc_reports"
fi
if [ ! -d "$outputDir" ]; then
echo "creating output directory '$outputDir'"
mkdir $outputDir
fi
for fastqFile in $fastqFiles ; do
echo "fastqcing $fastqFile"
if [ ! -f $fastqFile ]; then
continue;
fi
mysub "fastqc__$(basename $fastqFile)" "fastqc -j /sw/bin/java -o $outputDir -f fastq $fastqFile" -q medium | joblist .fastqc_jobs
done
wait4jobs .fastqc_jobs
mailme "fastqc done for $outputDir"
# todo create some summary report here
}
export -f dge_fastqc
#http://wiki.bash-hackers.org/howto/getopts_tutorial
dge_tophat_se(){
# http://stackoverflow.com/questions/18414054/bash-getopts-reading-optarg-for-optional-flags
echo $*
while getopts "i:" curopt; do
case $curopt in
i) IGENOME="$OPTARG";
esac
done
shift $(($OPTIND - 1))
local fastqFiles=$*
echo fastq $fastqFiles
echo igenomes "$IGENOME"
if [ -z "$IGENOME" ] || [ -z "$fastqFiles" ];
then echo "Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+" >&2 ; return;
fi
export bowtie_gindex="$IGENOME/Sequence/Bowtie2Index/genome"
export gtfFile="$IGENOME/Annotation/Genes/genes.gtf"
#head $gtfFile
if [ ! -f $gtfFile ]; then
>&2 echo "gtf '$gtfFile' does not exis"; return;
fi
if [ -z "$(which tophat)" ]; then
>&2 echo "no tomcat binary in PATH"; return;
fi
echo "running tophat using igenome '$IGENOME' for the following files"
ll $fastqFiles
#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)
for fastqFile in $fastqFiles ; do
echo "submitting tophat job for $fastqFile"
# DEBUG fastqFile=/projects/bioinfo/holger/projects/eric/trimmed/a1_ca.fastq.gz
fastqBaseName=$(basename ${fastqFile%%.fastq.gz})
outputdir=$fastqBaseName
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
mysub "${project}__tophat__${fastqBaseName}" "
tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
samtools index $outputdir/$(basename $outputdir).bam
" -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
done
wait4jobs .tophatjobs
## create tophat mapping report
source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/bioinfo_utils.sh 2>&1 2>/dev/null)
TophatMappingReport
}
export -f dge_tophat_se
# dge_tophat_se
# dge_tophat_se -i
dge_cuffdiff(){
local gtfFile=$1
local bamDir=$2
local labels=$3
if [ $# -ne 3 ]; then
echo "Usage: dge_fastqc <gtf_file> <bam directory> <labels>" >&2 ; return;
fi
if [ -z "$(which cuffdiff)" ]; then
>&2 echo "no cuffdiff binary in PATH"; return;
fi
if [ ! -f $gtfFile ]; then
>&2 echo "gtf '$gtfFile' does not exis"; return;
fi
## collect all bam-files and group them
allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort)
bamsSplit=""
for label in $(echo $labels | tr ", " " "); do
echo $label
labelBams=$(echo "$allBams" | grep $label | xargs echo -n | tr ' ' ',')
bamsSplit+=$labelBams" "
done
#echo $bamsSplit
## make sure (again that all files are there
echo $gtfFile $bamsSplit | tr "," " " | xargs ls -la
#mysub ${project}_cuffdiff "cuffdiff -L aRGm,aRGp,aRG,bRG,IPC,N -o . -p 8 mm10_ensGene_ccds.gtf $amBams $apBams $aBams $bBams $cBams $dBams 2>&1 | tee cuffdiff.log" -q long -n 8 -R span[hosts=1] | blockScript
cdCmd="cuffdiff -L $labels -o . -p 10 $gtfFile $bamsSplit"
#echo "cmd is: $cdCmd"
mysub ${project}__cuffdiff "$cdCmd" -q long -n 8 -R span[hosts=1] | blockScript
MakeCuffDB $gtfFile "NAN"
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment