Skip to content
Snippets Groups Projects
Commit 80340742 authored by Holger Brandl's avatar Holger Brandl
Browse files

continued star wrapper

parent 3c89c2c6
No related branches found
No related tags found
No related merge requests found
......@@ -124,7 +124,7 @@ mailme "$project: mapping done"
#########################################################################################################################
#### Basic Alginment QC and technical replicate grouping
#### Technical replicate grouping
#
#mcdir $baseDir/trep_pooled
#
......
......@@ -12,6 +12,7 @@ source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/v1.10/R/s
## todo tweak this to work on bioinfo as well
export PATH=/projects/bioinfo/holger/bin/bowtie2-2.2.2:$PATH
export PATH=/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:$PATH
export PATH=/home/brandl/bin/STAR/STAR-STAR_2.4.1d/source:$PATH
export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH
export PATH=/sw/apps/python/current/bin:$PATH
export PATH=/home/brandl/bin/deepTools/bin:$PATH
......@@ -157,7 +158,7 @@ for fastqFile in $fastqFiles ; do
mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
samtools index $outputdir/$(basename $outputdir).bam
" -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
" -n 5 -R span[hosts=1] -q medium | joblist .tophatjobs
done
wait4jobs .tophatjobs
......
......@@ -5,60 +5,64 @@
usage='
Use star to align fastq files against a genome
Usage: spin.R <igenome> <fastq_files>...
Usage: star_align.sh <igenome> <fastq_files>...
Options:
'
-c Cache results
#-c Cache results
#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : asdf asdf.fastq)
eval $(echo "$usage" | ~/bin/docopts/docopts -h - : asdf asdf.fastq another.fastq)
for fastqFile in ; do
echo processing $fastqFile
done
eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : "$@")
#eval $(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz)
echo $igenome
#for fastqFile in ${fastq_files[@]} ; do
# echo processing $fastqFile
#done
#
#echo $igenome
gtfFile=$1
bamDir=$2
labels=$3
## build index if not present
echo "${!dopts[@]}"
echo "${dopts[-c]}"
echo "${dopts[igenome]}"
#http://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/
#args=$(echo "$usage" | ~/bin/docopts/docopts -h - : "test.R")
export star_index="$igenome/Sequence/StarIndex/genome"
export gtfFile="$igenome/Annotation/Genes/genes.gtf"
head $gtfFile
## this will define one environment variable per parameter (c,e,m,v)
eval $(echo "$usage" | ~/bin/docopts/docopts -h - : -e test.R)
if [ ! -f $gtfFile ]; then
>&2 echo "gtf '$gtfFile' does not exis"; exit 1;
fi
fastqFiles=${fastq_files[@]}
## basic
#http://www.homolog.us/blogs/blog/2012/11/02/star-really-kick-ass-rna-seq-aligner/
# IGENOME=/projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38
if ! -d "${igenome}/Sequence/StarIndex" ]; then
if [ -z "$IGENOME" ] || [ -z "$fastqFiles" ];
then echo "Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+" >&2 ; return;
fi
mailme "${project}: creating STAR index for $igenome"
mkdir ${igenome}/Sequence/StarIndex
cmd="STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
#eval $cmd
#STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
mysub "${project}_star_index" "$cmd" -n 5 -R span[hosts=1] -q medium | blockScript
## prevent modification
chmod -R -w ${igenome}/Sequence/StarIndex
fi
export bowtie_gindex="$IGENOME/Sequence/Bowtie2Index/genome"
export gtfFile="$IGENOME/Annotation/Genes/genes.gtf"
#head $gtfFile
if [ ! -f $gtfFile ]; then
>&2 echo "gtf '$gtfFile' does not exis"; return;
>&2 echo "gtf '$gtfFile' does not exis"; exit 1;
fi
if [ -z "$(which tophat)" ]; then
>&2 echo "no tomcat binary in PATH"; return;
if [ -z "$(which STAR)" ]; then
>&2 echo "no tomcat binary in PATH"; exit 1;
fi
echo "running tophat using igenome '$IGENOME' for the following files"
echo "running STAR using igenome '${igenome}' for the following files"
ll $fastqFiles
#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)
......@@ -72,11 +76,11 @@ for fastqFile in $fastqFiles ; do
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
mysub "${project}__tophat__${fastqBaseName}" "
tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
samtools index $outputdir/$(basename $outputdir).bam
## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
mysub "${project}__star__${fastqBaseName}" "
# tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
STAR --genomeDir $star_index --readFilesIn /path/to/read1 $fastqFile --runThreadN 6 --outFileNamePrefix $fastqBaseName --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile
" -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
done
......@@ -87,6 +91,7 @@ wait4jobs .tophatjobs
dge_bam_correlate .
## create tophat mapping report
spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R .
## todo adjust report to STAR
#spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R .
mailme "$project: tophat done in $(pwd)"
\ No newline at end of file
mailme "$project: STAR done in $(pwd)"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment