cont helin project

eb740601 · Holger Brandl · 6a530e75 · eb740601
Commit eb740601 authored 10 years ago by Holger Brandl
--- a/bash/lsf_rna_seq.sh
+++ b/bash/lsf_rna_seq.sh
+## docs
+## http://blog.joncairns.com/2013/08/what-you-need-to-know-about-bash-functions/
+

 ## create fastq report for all fastq and fastq.gz files in the current directory
 dge_fastqc(){
@@ -9,7 +12,7 @@ while getopts "o:" curopt; do
 done
 shift $(($OPTIND - 1))

-fastqFiles=$*
+local fastqFiles=$*

 #if [ -z "$fastqFiles" ]; then
 if [ $# -lt 1 ]; then
@@ -48,21 +51,24 @@ mailme "fastqc done for $outputDir"
 export -f dge_fastqc


-
 #http://wiki.bash-hackers.org/howto/getopts_tutorial
 dge_tophat_se(){

 # http://stackoverflow.com/questions/18414054/bash-getopts-reading-optarg-for-optional-flags
+echo $*

 while getopts "i:" curopt; do
    case $curopt in
-    i) IGENOME=$OPTARG;
+    i) IGENOME="$OPTARG";
    esac
 done
 shift $(($OPTIND - 1))


-fastqFiles=$*
+local fastqFiles=$*
+
+echo fastq $fastqFiles
+echo igenomes "$IGENOME"

 if [ -z "$IGENOME" ] || [ -z "$fastqFiles" ];
     then echo "Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+" >&2 ; return;
@@ -84,6 +90,7 @@ fi


 echo "running tophat using igenome '$IGENOME' for the following files"
+ll $fastqFiles

 #fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)

@@ -95,12 +102,12 @@ for fastqFile in $fastqFiles ; do
    outputdir=$fastqBaseName

    ## uniquely mapping reads only:   http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
-#    mysub "${project}__tophat__${fastqBaseName}" "
-#    tophat -p6  -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
-#
-#    mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
-#    samtools index $outputdir/$(basename $outputdir).bam
-#    " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
+    mysub "${project}__tophat__${fastqBaseName}" "
+    tophat -p6  -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
+
+    mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
+    samtools index $outputdir/$(basename $outputdir).bam
+    " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
 done

 wait4jobs .tophatjobs
@@ -108,9 +115,53 @@ wait4jobs .tophatjobs
 ## create tophat mapping report
 source <(curl https://dl.dropboxusercontent.com/u/113630701/datautils/bash/bioinfo_utils.sh 2>&1 2>/dev/null)
 TophatMappingReport
-
-
 }
+export -f dge_tophat_se

 # dge_tophat_se
-# dge_tophat_se -i
\ No newline at end of file
+# dge_tophat_se -i
+
+
+dge_cuffdiff(){
+
+local gtfFile=$1
+local bamDir=$2
+local labels=$3
+
+if [ $# -ne 3 ]; then
+ echo "Usage: dge_fastqc <gtf_file>  <bam directory> <labels>" >&2 ; return;
+fi
+
+if [ -z "$(which cuffdiff)" ]; then
+    >&2 echo "no cuffdiff binary in PATH"; return;
+fi
+
+if [ ! -f $gtfFile ]; then
+    >&2 echo "gtf '$gtfFile' does not exis"; return;
+fi
+
+## collect all bam-files and group them
+allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort)
+
+bamsSplit=""
+for label in $(echo $labels | tr ", " " "); do
+    echo $label
+    labelBams=$(echo  "$allBams" | grep $label | xargs echo -n | tr ' ' ',')
+    bamsSplit+=$labelBams" "
+done
+#echo $bamsSplit
+
+## make sure (again that all files are there
+echo $gtfFile $bamsSplit | tr "," " "  | xargs ls -la
+
+
+#mysub ${project}_cuffdiff "cuffdiff -L aRGm,aRGp,aRG,bRG,IPC,N -o . -p 8 mm10_ensGene_ccds.gtf $amBams $apBams $aBams $bBams $cBams $dBams  2>&1 | tee cuffdiff.log" -q long -n 8 -R span[hosts=1]   | blockScript
+
+cdCmd="cuffdiff -L $labels -o . -p 10 $gtfFile $bamsSplit"
+#echo "cmd is: $cdCmd"
+mysub ${project}__cuffdiff "$cdCmd"  -q long -n 8 -R span[hosts=1] | blockScript
+
+
+MakeCuffDB $gtfFile "NAN"
+
+}
\ No newline at end of file