Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env bash
#https://www.biostars.org/p/91020/
usage='
Use star to align fastq files against a genome
Usage: spin.R <igenome> <fastq_files>...
Options:
'
-c Cache results
#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : asdf asdf.fastq)
eval $(echo "$usage" | ~/bin/docopts/docopts -h - : asdf asdf.fastq another.fastq)
for fastqFile in ; do
echo processing $fastqFile
done
echo $igenome
gtfFile=$1
bamDir=$2
labels=$3
echo "${!dopts[@]}"
echo "${dopts[-c]}"
echo "${dopts[igenome]}"
#http://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/
#args=$(echo "$usage" | ~/bin/docopts/docopts -h - : "test.R")
## this will define one environment variable per parameter (c,e,m,v)
eval $(echo "$usage" | ~/bin/docopts/docopts -h - : -e test.R)
fastqFiles=${fastq_files[@]}
# IGENOME=/projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38
if [ -z "$IGENOME" ] || [ -z "$fastqFiles" ];
then echo "Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+" >&2 ; return;
fi
export bowtie_gindex="$IGENOME/Sequence/Bowtie2Index/genome"
export gtfFile="$IGENOME/Annotation/Genes/genes.gtf"
#head $gtfFile
if [ ! -f $gtfFile ]; then
>&2 echo "gtf '$gtfFile' does not exis"; return;
fi
if [ -z "$(which tophat)" ]; then
>&2 echo "no tomcat binary in PATH"; return;
fi
echo "running tophat using igenome '$IGENOME' for the following files"
ll $fastqFiles
#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)
for fastqFile in $fastqFiles ; do
echo "submitting tophat job for $fastqFile"
# DEBUG fastqFile=/projects/bioinfo/holger/projects/helin/mouse/trimmed/mouse_big_cyst_rep4_ca.fastq.gz
fastqBaseName=$(basename ${fastqFile%%.fastq.gz})
outputdir=$fastqBaseName
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
mysub "${project}__tophat__${fastqBaseName}" "
tophat -p6 -G $gtfFile -g1 -o $outputdir $bowtie_gindex $fastqFile
mv $outputdir/accepted_hits.bam $outputdir/$(basename $outputdir).bam
samtools index $outputdir/$(basename $outputdir).bam
" -n 5 -R span[hosts=1] -q long | joblist .tophatjobs
done
wait4jobs .tophatjobs
dge_bam_correlate .
## create tophat mapping report
spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R .
mailme "$project: tophat done in $(pwd)"