Skip to content
Snippets Groups Projects
Commit 80340742 authored by Holger Brandl's avatar Holger Brandl
Browse files

continued star wrapper

parent 3c89c2c6
No related branches found
No related tags found
No related merge requests found
...@@ -124,7 +124,7 @@ mailme "$project: mapping done" ...@@ -124,7 +124,7 @@ mailme "$project: mapping done"
######################################################################################################################### #########################################################################################################################
#### Basic Alginment QC and technical replicate grouping #### Technical replicate grouping
# #
#mcdir $baseDir/trep_pooled #mcdir $baseDir/trep_pooled
# #
......
...@@ -12,6 +12,7 @@ source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/v1.10/R/s ...@@ -12,6 +12,7 @@ source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/v1.10/R/s
## todo tweak this to work on bioinfo as well ## todo tweak this to work on bioinfo as well
export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH
export PATH=/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:$PATH export PATH=/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:$PATH
export PATH=/home/brandl/bin/STAR/STAR-STAR_2.4.1d/source:$PATH
export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH
export PATH=/sw/apps/python/current/bin:$PATH export PATH=/sw/apps/python/current/bin:$PATH
export PATH=/home/brandl/bin/deepTools/bin:$PATH export PATH=/home/brandl/bin/deepTools/bin:$PATH
...@@ -157,7 +158,7 @@ for fastqFile in $fastqFiles ; do ...@@ -157,7 +158,7 @@ for fastqFile in $fastqFiles ; do
mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
samtools index $outputdir/$(basename $outputdir).bam samtools index $outputdir/$(basename $outputdir).bam
" -n 5 -R span[hosts=1] -q long | joblist .tophatjobs " -n 5 -R span[hosts=1] -q medium | joblist .tophatjobs
done done
wait4jobs .tophatjobs wait4jobs .tophatjobs
......
...@@ -5,60 +5,64 @@ ...@@ -5,60 +5,64 @@
usage=' usage='
Use star to align fastq files against a genome Use star to align fastq files against a genome
Usage: spin.R <igenome> <fastq_files>... Usage: star_align.sh <igenome> <fastq_files>...
Options: Options:
' '
-c Cache results #-c Cache results
#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : asdf asdf.fastq) eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@")
eval $(echo "$usage" | ~/bin/docopts/docopts -h - : asdf asdf.fastq another.fastq) #eval $(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz)
for fastqFile in ; do
echo processing $fastqFile
done
echo $igenome #for fastqFile in ${fastq_files[@]} ; do
# echo processing $fastqFile
#done
#
#echo $igenome
gtfFile=$1 ## build index if not present
bamDir=$2
labels=$3
echo "${!dopts[@]}"
echo "${dopts[-c]}"
echo "${dopts[igenome]}"
#http://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/ export star_index="$igenome/Sequence/StarIndex/genome"
#args=$(echo "$usage" | ~/bin/docopts/docopts -h - : "test.R") export gtfFile="$igenome/Annotation/Genes/genes.gtf"
head $gtfFile
## this will define one environment variable per parameter (c,e,m,v) if [ ! -f $gtfFile ]; then
eval $(echo "$usage" | ~/bin/docopts/docopts -h - : -e test.R) >&2 echo "gtf '$gtfFile' does not exis"; exit 1;
fi
fastqFiles=${fastq_files[@]} ## basic
#http://www.homolog.us/blogs/blog/2012/11/02/star-really-kick-ass-rna-seq-aligner/
# IGENOME=/projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 if ! -d "${igenome}/Sequence/StarIndex" ]; then
if [ -z "$IGENOME" ] || [ -z "$fastqFiles" ]; mailme "${project}: creating STAR index for $igenome"
then echo "Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+" >&2 ; return; mkdir ${igenome}/Sequence/StarIndex
fi
cmd="STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
#eval $cmd
#STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
mysub "${project}_star_index" "$cmd" -n 5 -R span[hosts=1] -q medium | blockScript
## prevent modification
chmod -R -w ${igenome}/Sequence/StarIndex
fi
export bowtie_gindex="$IGENOME/Sequence/Bowtie2Index/genome"
export gtfFile="$IGENOME/Annotation/Genes/genes.gtf"
#head $gtfFile
if [ ! -f $gtfFile ]; then if [ ! -f $gtfFile ]; then
>&2 echo "gtf '$gtfFile' does not exis"; return; >&2 echo "gtf '$gtfFile' does not exis"; exit 1;
fi fi
if [ -z "$(which tophat)" ]; then if [ -z "$(which STAR)" ]; then
>&2 echo "no tomcat binary in PATH"; return; >&2 echo "no tomcat binary in PATH"; exit 1;
fi fi
echo "running tophat using igenome '$IGENOME' for the following files" echo "running STAR using igenome '${igenome}' for the following files"
ll $fastqFiles ll $fastqFiles
#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz) #fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)
...@@ -72,11 +76,11 @@ for fastqFile in $fastqFiles ; do ...@@ -72,11 +76,11 @@ for fastqFile in $fastqFiles ; do
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464 ## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile ### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
mysub "${project}__tophat__${fastqBaseName}" "
tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam ## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
samtools index $outputdir/$(basename $outputdir).bam mysub "${project}__star__${fastqBaseName}" "
# tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
STAR --genomeDir $star_index --readFilesIn /path/to/read1 $fastqFile --runThreadN 6 --outFileNamePrefix $fastqBaseName --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile
" -n 5 -R span[hosts=1] -q long | joblist .tophatjobs " -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
done done
...@@ -87,6 +91,7 @@ wait4jobs .tophatjobs ...@@ -87,6 +91,7 @@ wait4jobs .tophatjobs
dge_bam_correlate . dge_bam_correlate .
## create tophat mapping report ## create tophat mapping report
spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R . ## todo adjust report to STAR
#spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R .
mailme "$project: tophat done in $(pwd)" mailme "$project: STAR done in $(pwd)"
\ No newline at end of file \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment