Skip to content
Snippets Groups Projects
Commit cebd3cb7 authored by Holger Brandl's avatar Holger Brandl
Browse files

improved star integration

parent 49599782
No related branches found
No related tags found
No related merge requests found
......@@ -120,6 +120,9 @@ fastqFiles=$(ls $baseDir/trimmed/*.fastq.gz)
dge_tophat_se -i $igenome $fastqFiles 2>&1 | tee mapped.log
## also create bigwig files
# dge_bigwig ${igenome}/Sequence/Bowtie2Index/genome.fa.fai $(find . -name "*.bam" | grep -v unmapped | xargs echo) &
mailme "$project: mapping done"
......
......@@ -398,3 +398,35 @@ mailme "$project: cuffdiff done in $(pwd)"
}
export -f dge_cuffdiff
# Create a star index for a given igenome
dge_create_star_index(){
if [ $# -ne 1 ]; then
echo "Usage: dge_create_star_index <igenome>" >&2 ; return;
fi
igenome=$1
if [ ! -d "$igenome" ] | [ ! -d "${igenome}/Sequence" ]; then
echo "igenome directory '$igenome' does not exist" >&2 ; return;
fi
export star_index="${igenome}/Sequence/StarIndex"
chmod +w $(dirname ${star_index})
mailme "${project}: creating STAR index in ${star_index}"
mkdir ${star_index}
cmd="STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
#eval $cmd
#STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
mysub "${project}_star_index" "$cmd" -n 5 -R span[hosts=1] -q medium | blockScript
## prevent further modification
chmod -w $(dirname ${star_index})
mailme "created star index for $igenome"
}
export -f dge_create_star_index
......@@ -23,12 +23,6 @@ eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : "$@")"
#eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz fastq2)"
for fastqFile in ${fastq_files[@]} ; do
echo processing $fastqFile
done
#
#echo $igenome
## extract all configuration parameters
fastqFiles=${fastq_files[@]}
......@@ -52,18 +46,8 @@ fi
## build index if not present
if [ ! -f "${star_index}/SA" ]; then
chmod +w $(dirname ${star_index})
mailme "${project}: creating STAR index for $igenome"
mkdir ${star_index}
cmd="STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
#eval $cmd
#STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
mysub "${project}_star_index" "$cmd" -n 5 -R span[hosts=1] -q medium | blockScript
## prevent modification
chmod -w $(dirname ${star_index})
echo "Missing STAR for ${star_index}/SA; use 'dge_create_star_index ${igenome}' to create it"; exit 1;
# dge_create_star_index ${igenome}'
fi
......@@ -73,7 +57,7 @@ echo "running STAR using igenome '${igenome}' for the following files"
#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)
for fastqFile in $fastqFiles ; do
echo "submitting tophat job for $fastqFile"
echo "submitting STAR job for $fastqFile"
# DEBUG fastqFile=/projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz
fastqBaseName=$(basename ${fastqFile%%.fastq.gz})
......@@ -82,15 +66,19 @@ for fastqFile in $fastqFiles ; do
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
## params:
# --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
# --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout to get rid of artifical junctions
# --quantMode GeneCounts see https://groups.google.com/forum/#!searchin/rna-star/GeneCounts/rna-star/gZRJx3ElRNo/p5FjBYKuY00J
mysub "${project}__star__${fastqBaseName}" "
STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 --readFilesCommand zcat --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile
STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 --readFilesCommand zcat --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts
mv ${fastqBaseName}.Aligned.sortedByCoord.out.bam ${fastqBaseName}.bam
samtools index ${fastqBaseName}.bam
" -n 5 -R span[hosts=1] -q short | joblist .tophatjobs
" -n 5 -R span[hosts=1] -q medium | joblist .starjobs
done
wait4jobs .tophatjobs
wait4jobs .starjobs
rm -rf *STARgenome *.Log.progress.out _STARtmp *.SJ.out.tab *Log.out
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment