Commit 22279176 authored by Holger Brandl's avatar Holger Brandl

cont furiosa porting of rna-seq workflow

parent 10fdfc65
......@@ -136,15 +136,19 @@ echo "Used bam labels are: $bamLabels"
# http://deeptools.readthedocs.org/en/latest/content/tools/multiBamSummary.html
# http://deeptools.readthedocs.org/en/latest/content/tools/plotCorrelation.html?highlight=plotfile
bcCmd="
## todo add python2 ~/.local/bin/ or fix setup
multiBamSummary bins --bamfiles $(echo $bamFiles | xargs echo) --labels $bamLabels -out bam_cor_matrix.txt --numberOfProcessors 4
plotCorrelation --corData bam_cor_matrix.txt --plotFile bc.pdf --corMethod spearman --zMin 0.5 --zMax 1 -p heatmap
"
#echo "cmd is $bcCmd"
#mysub "${project}__bamcorrelate" "$bcCmd" -q long -n 4 -R span[hosts=1] | blockScript .bamcorrelate
jl submit --wait --jl .bamcorrelate -q long -t 4 -n "${project}__bamcorrelate" "$bcCmd"
## old lsf version
#jl submit --wait --jl .bamcorrelate -q long -t 4 -n "${project}__bamcorrelate" "$bcCmd"
jl reset .bamcorrelate
jl submit --jl .bamcorrelate -w 10:00 -t 4 -n "${project}__bamcorrelate" "$bcCmd"
}
export -f dge_bam_correlate
......
......@@ -90,14 +90,22 @@ for (fastqFile in fastqFiles) {
// --quantMode GeneCounts see https://groups.google.com/forum/#!searchin/rna-star/GeneCounts/rna-star/gZRJx3ElRNo/p5FjBYKuY00J
// --outSJfilterCountUniqueMin see https://groups.google.com/forum/#!topic/rna-star/_1BeAlGUmpA
val fastqBaseName = fastqFile.name.removeSuffix(".gz").removeSuffix(".fastq")
val isPE = with(fastqFile.name) { endsWith("_1.fastq.gz") || endsWith("_1.fastq") };
var fastqBaseName = fastqFile.name.removeSuffix(".gz").removeSuffix(".fastq")
if(isPE) fastqBaseName = fastqBaseName.removeSuffix("_1")
val optionalZcat = if (fastqFile.name.endsWith("gz")) "--readFilesCommand zcat" else ""
// todo consider to use --outTmpDir which defaults to outFileNamePrefix STARtmp and which is deleted automatically
// or use process substiutation in case of zipped reads (see https://github.com/alexdobin/STAR/issues/143#issuecomment-216597465)
// detect if paired end reads are supplied
val revReads = if (isPE) fastqFile.parentFile.resolve(fastqFile.name.replace("_1.fastq", "_2.fastq")).path else ""
val cmd = """
STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 ${optionalZcat} --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts --outFilterMultimapNmax 1 --outSJfilterCountUniqueMin 8 3 3 3
STAR --genomeDir ${star_index} --readFilesIn ${fastqFile} ${revReads} --runThreadN 6 ${optionalZcat} --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile ${gtfFile} --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts --outFilterMultimapNmax 1 --outSJfilterCountUniqueMin 8 3 3 3
mv ${fastqBaseName}.Aligned.sortedByCoord.out.bam ${fastqBaseName}.bam
samtools index ${fastqBaseName}.bam
""".trimIndent()
......@@ -105,7 +113,7 @@ for (fastqFile in fastqFiles) {
// todo provide proper walltime here
// slurm memory limit https://rc.fas.harvard.edu/resources/documentation/slurm-memory/
// sacct -o MaxRSS -j JOBID
jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "10:00", "", 5, 40000, "", better.files.File(File(".").toPath())))
jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "10:00", "", 5, 40000, "", better.files.File(File(".").toPath())))
}
......@@ -143,5 +151,10 @@ dge_star_counts2matrix
# http://superuser.com/questions/178587/how-do-i-detach-a-process-from-terminal-entirely
# disabled since it does not work for functions (see http://stackoverflow.com/questions/16435629/linux-shell-script-call-a-function-by-nohup
#nohup dge_bam_correlate . &
## use jl instead to run via scheduler
## disabled because deeptools is not yet installed
#dge_bam_correlate `ls *.bam`
#dge_bam_correlate . &
""")
......@@ -3,22 +3,31 @@
cd ~/bin
git clone https://github.com/dpryan79/pyBigWig.git
cd pyBigWig
python setup.py install --user
python2 setup.py install --user
#easy_install-2.7 --user cython
wget http://cython.org/release/Cython-0.24.tar.gz
tar xvf Cython-0.24.tar.gz
cd Cython-0.24
python2 setup.py install --user
easy_install-2.7 --user cython
cd ~/bin
git clone https://github.com/pysam-developers/pysam
cd pysam
python setup.py install --user
python2 setup.py clean
python2 setup.py install --user
cd ~/bin
wget -O deepTools-2.2.2.tar.gz https://github.com/fidelram/deepTools/archive/2.2.2.tar.gz
tar xvf deepTools-2.2.2.tar.gz
cd deepTools-2.2.2
python setup.py install --user
wget -O deepTools-2.2.4.tar.gz https://github.com/fidelram/deepTools/archive/2.2.4.tar.gz
tar xvf deepTools-2.2.4.tar.gz
cd deepTools-2.2.4
#http://effbot.org/pyfaq/when-importing-module-x-why-do-i-get-undefined-symbol-pyunicodeucs2.htm
# https://github.com/galaxyproject/tools-iuc/issues/256
python2 setup.py install --user
/home/brandl/.local/bin/correctGCBias
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment