Skip to content
Snippets Groups Projects
Commit b2966a0b authored by Holger Brandl's avatar Holger Brandl
Browse files

cont. rna-seq workflow

parent 625bd201
No related branches found
No related tags found
No related merge requests found
...@@ -73,6 +73,7 @@ ggplot(algnSummary, aes(condition, mapped_reads)) + ...@@ -73,6 +73,7 @@ ggplot(algnSummary, aes(condition, mapped_reads)) +
#ggplot(melt(algnSummary), aes(condition, value)) + geom_bar(stat="identity") +facet_wrap(~variable, scales="free") + ggtitle("mapping summary") + scale_y_continuous(labels=comma) + theme(axis.text.x=element_text(angle=90, hjust=0)) #ggplot(melt(algnSummary), aes(condition, value)) + geom_bar(stat="identity") +facet_wrap(~variable, scales="free") + ggtitle("mapping summary") + scale_y_continuous(labels=comma) + theme(axis.text.x=element_text(angle=90, hjust=0))
#ggsave2(w=10, h=10, p="mapstats") #ggsave2(w=10, h=10, p="mapstats")
##todo multimapper counting
######################################################################################################################## ########################################################################################################################
#' ## Alignment Correlation #' ## Alignment Correlation
......
...@@ -19,9 +19,9 @@ if [ $# -lt 1 ]; then ...@@ -19,9 +19,9 @@ if [ $# -lt 1 ]; then
echo "Usage: dge_fastqc [-o <output_directory>] [<fastq.gz file>]+" >&2 ; return; echo "Usage: dge_fastqc [-o <output_directory>] [<fastq.gz file>]+" >&2 ; return;
fi fi
## use default directory if not specified ## use current directory if not specified
if [ -z "$outputDir" ]; then if [ -z "$outputDir" ]; then
outputDir="fastqc_reports" outputDir="fastqc"
fi fi
if [ ! -d "$outputDir" ]; then if [ ! -d "$outputDir" ]; then
...@@ -44,24 +44,41 @@ done ...@@ -44,24 +44,41 @@ done
wait4jobs .fastqc_jobs wait4jobs .fastqc_jobs
mailme "fastqc done for $outputDir" mailme "fastqc done for $outputDir"dge_tophat_se
ziprm fastqc_logs fastqc__* ziprm fastqc_logs fastqc__*
## create a small report
source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/R/utils/spinr.sh 2>&1 2>/dev/null)
spinr $(which fastqc_summary.R) $outputDir
# todo create some summary report here # todo create some summary report here
spin.R $DGE_HOME/fastqc_summary.R $outputDir
} }
export -f dge_fastqc export -f dge_fastqc
dge_cutadapt(){
export Ill_ADAPTERS=/projects/bioinfo/common/illumina_universal_index.fasta
for fastqFile in $* ; do
# DEBUG fastqFile=/projects/bioinfo/holger/projects/helin/mouse/treps_pooled/mouse_liver_polar_stage1_rep4.fastq.gz
caFastq=$(basename $fastqFile .fastq.gz)_ca.fastq.gz
echo "cutadapting $caFastq into $caFastq"
#todo use a more specific trimming model (trim just correct part on each side without using reverse complements
mysub "$project__ca__$caFastq" "cutadapt -b file:$Ill_ADAPTERS -m 20 -q 25 -o $caFastq $fastqFile > $caFastq.ca.log" -q long | joblist .cajobs
done
wait4jobs .cajobs
}
export -f dge_cutadapt
#http://wiki.bash-hackers.org/howto/getopts_tutorial #http://wiki.bash-hackers.org/howto/getopts_tutorial
dge_tophat_se(){ dge_tophat_se(){
# http://stackoverflow.com/questions/18414054/bash-getopts-reading-optarg-for-optional-flags # http://stackoverflow.com/questions/18414054/bash-getopts-reading-optarg-for-optional-flags
echo $* #echo $*
while getopts "i:" curopt; do while getopts "i:" curopt; do
case $curopt in case $curopt in
...@@ -73,6 +90,7 @@ shift $(($OPTIND - 1)) ...@@ -73,6 +90,7 @@ shift $(($OPTIND - 1))
local fastqFiles=$* local fastqFiles=$*
# IGENOME=/projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38
echo fastq $fastqFiles echo fastq $fastqFiles
echo igenomes "$IGENOME" echo igenomes "$IGENOME"
...@@ -103,11 +121,12 @@ ll $fastqFiles ...@@ -103,11 +121,12 @@ ll $fastqFiles
for fastqFile in $fastqFiles ; do for fastqFile in $fastqFiles ; do
echo "submitting tophat job for $fastqFile" echo "submitting tophat job for $fastqFile"
# DEBUG fastqFile=/projects/bioinfo/holger/projects/eric/trimmed/a1_ca.fastq.gz # DEBUG fastqFile=/projects/bioinfo/holger/projects/helin/mouse/trimmed/mouse_big_cyst_rep4_ca.fastq.gz
fastqBaseName=$(basename ${fastqFile%%.fastq.gz}) fastqBaseName=$(basename ${fastqFile%%.fastq.gz})
outputdir=$fastqBaseName outputdir=$fastqBaseName
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464 ## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
mysub "${project}__tophat__${fastqBaseName}" " mysub "${project}__tophat__${fastqBaseName}" "
tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
...@@ -119,8 +138,8 @@ done ...@@ -119,8 +138,8 @@ done
wait4jobs .tophatjobs wait4jobs .tophatjobs
## create tophat mapping report ## create tophat mapping report
source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/bioinfo_utils.sh 2>&1 2>/dev/null) spin.R $DGE_HOME/bam_qc.R .
TophatMappingReport
} }
export -f dge_tophat_se export -f dge_tophat_se
...@@ -165,9 +184,32 @@ echo $gtfFile $bamsSplit | tr "," " " | xargs ls -la ...@@ -165,9 +184,32 @@ echo $gtfFile $bamsSplit | tr "," " " | xargs ls -la
cdCmd="cuffdiff -L $labels -o . -p 10 $gtfFile $bamsSplit" cdCmd="cuffdiff -L $labels -o . -p 10 $gtfFile $bamsSplit"
#echo "cmd is: $cdCmd" #echo "cmd is: $cdCmd"
mysub ${project}__cuffdiff "$cdCmd" -q long -n 8 -R span[hosts=1] | blockScript # eval $cdCmd
mysub "${project}__cuffdiff" "$cdCmd" -q long -n 4 -R span[hosts=1] | blockScript
## create a cuffdiff db using cummerbund in a temporary directory to avoid locking problems
tmpDbDir=$(mktemp -d)
cp -r . $tmpDbDir
## todo remove this hack
genome=$(echo $gtfFile | cut -f7 -d'/'); echo "genome is $genome"
R_LIBS=/tmp/r_index
echo '
require(cummeRbund)
dbDir=commandArgs(T)[1]
gtfFile=commandArgs(T)[2]
genome=commandArgs(T)[3]
## note without providing the gtf the db is much smaller
readCufflinks(dir=dbDir, rebuild=T, gtf=gtfFile, genome=genome)
' | R -q --no-save --no-restore --args $tmpDbDir $gtfFile $genome
if [ ! -f $tmpDbDir/cuffData.db ]; then
>&2 echo "cummerbund failed to create cuffidff sqlite db"; return;
fi
MakeCuffDB $gtfFile "NAN" cp $tmpDbDir/cuffData.db .
rm -rf $tmpDbDir ## because it's no longer needed
} }
\ No newline at end of file export -f dge_cuffdiff
- cuffdbs change dramatically in size if gtf is provided when building them, but what impact does it have on the results
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment