From e880df2ffb586aafa9e7b1525f57c89cb09772b1 Mon Sep 17 00:00:00 2001
From: Holger Brandl <brandl@mpi-cbg.de>
Date: Wed, 26 Nov 2014 10:44:38 +0100
Subject: [PATCH] added new tech-rep-renaming

---
 dge_workflow/lsf_rna_seq.sh |  3 +++
 dge_workflow/todo.txt       |  4 ++++
 misc/snippets.sh            | 30 ++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)
 create mode 100755 misc/snippets.sh

diff --git a/dge_workflow/lsf_rna_seq.sh b/dge_workflow/lsf_rna_seq.sh
index 6e1ff57..afccde0 100755
--- a/dge_workflow/lsf_rna_seq.sh
+++ b/dge_workflow/lsf_rna_seq.sh
@@ -191,7 +191,10 @@ cp -r . $tmpDbDir
 
 ## todo remove this hack
 genome=$(echo $gtfFile | cut -f7 -d'/'); echo "genome is $genome"
+
+## make sure to use temp-r to avoid file locking problems
 R_LIBS=/tmp/r_index
+
 echo '
 require(cummeRbund)
 dbDir=commandArgs(T)[1]
diff --git a/dge_workflow/todo.txt b/dge_workflow/todo.txt
index 78c2d4e..ddf7e72 100755
--- a/dge_workflow/todo.txt
+++ b/dge_workflow/todo.txt
@@ -1,3 +1,7 @@
 - cuffdbs change dramatically in size if gtf is provided when building them, but what impact does it have on the results
 
 - Also try to remove RNA PCR Primer enrichment. Currently we just remove index and universal adapter
+
+
+1) evaluate if trimmoatic is the better trimmer (with respect to cutadapt)
+- also consider to use contamination list from fastqc for trimming (see https://www.biostars.org/p/15753/)
\ No newline at end of file
diff --git a/misc/snippets.sh b/misc/snippets.sh
new file mode 100755
index 0000000..bcd0752
--- /dev/null
+++ b/misc/snippets.sh
@@ -0,0 +1,30 @@
+#########################################################################################################################
+#### Trimmomatic
+
+zcat  $fastqFile | head -n 400000 > test.fastq
+gzip test.fastq
+
+export TRIMMOMATIC_HOME=/projects/bioinfo/holger/bin/Trimmomatic-0.32
+
+# do the filtering
+mcdir $baseDir/trimmed
+
+for fastqFile in $baseDir/treps_pooled/*fastq.gz ; do
+    # DEBUG fastqFile=/projects/bioinfo/holger/projects/helin/dog/treps_pooled/dog_big_cyst_rep1.fastq.gz
+    # DEBUG fastqFile=test.fastq.gz
+
+    caFastq=$(basename $fastqFile .fastq.gz)_tm.fastq.gz
+    echo "cutadapting $caFastq into $caFastq"
+
+    cmd="java -Xmx2g -jar $TRIMMOMATIC_HOME/trimmomatic-0.32.jar SE -threads 1 -phred33 $fastqFile $caFastq ILLUMINACLIP:$TRIMMOMATIC_HOME/adapters/TruSeq3-SE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
+    eval $cmd
+    mysub "$project__ca__$caFastq" "cutadapt -m 20 -q 25 -o $caFastq $fastqFile > $caFastq.ca.log"  -q long  | joblist .tmjobs
+done
+wait4jobs .tmjobs
+
+
+dge_fastqc -o $baseDir/fastqc_tm $(ls $baseDir/trimmed/*fastq.gz)
+
+mailme "trimmomatic done"
+
+
-- 
GitLab