Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
ngs_tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bioinfo
ngs_tools
Commits
80340742
Commit
80340742
authored
9 years ago
by
Holger Brandl
Browse files
Options
Downloads
Patches
Plain Diff
continued star wrapper
parent
3c89c2c6
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
dge_workflow/dge_master_template.sh
+1
-1
1 addition, 1 deletion
dge_workflow/dge_master_template.sh
dge_workflow/dge_utils.sh
+2
-1
2 additions, 1 deletion
dge_workflow/dge_utils.sh
dge_workflow/star_align.sh
+41
-36
41 additions, 36 deletions
dge_workflow/star_align.sh
with
44 additions
and
38 deletions
dge_workflow/dge_master_template.sh
+
1
−
1
View file @
80340742
...
...
@@ -124,7 +124,7 @@ mailme "$project: mapping done"
#########################################################################################################################
####
Basic Alginment QC and t
echnical replicate grouping
####
T
echnical replicate grouping
#
#mcdir
$baseDir
/trep_pooled
#
...
...
This diff is collapsed.
Click to expand it.
dge_workflow/dge_utils.sh
+
2
−
1
View file @
80340742
...
...
@@ -12,6 +12,7 @@ source <(curl https://raw.githubusercontent.com/holgerbrandl/datautils/v1.10/R/s
## todo tweak this to work on bioinfo as well
export
PATH
=
/projects/bioinfo/holger/bin/bowtie2-2.2.2:
$PATH
export
PATH
=
/projects/bioinfo/holger/bin/tophat-2.0.13.Linux_x86_64:
$PATH
export
PATH
=
/home/brandl/bin/STAR/STAR-STAR_2.4.1d/source:
$PATH
export
PATH
=
/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:
$PATH
export
PATH
=
/sw/apps/python/current/bin:
$PATH
export
PATH
=
/home/brandl/bin/deepTools/bin:
$PATH
...
...
@@ -157,7 +158,7 @@ for fastqFile in $fastqFiles ; do
mv
$outputdir
/accepted_hits.bam
$outputdir
/
$(
basename
$outputdir
)
.bam
samtools index
$outputdir
/
$(
basename
$outputdir
)
.bam
"
-n
5
-R
span[hosts
=
1]
-q
long
| joblist .tophatjobs
"
-n
5
-R
span[hosts
=
1]
-q
medium
| joblist .tophatjobs
done
wait4jobs .tophatjobs
...
...
This diff is collapsed.
Click to expand it.
dge_workflow/star_align.sh
+
41
−
36
View file @
80340742
...
...
@@ -5,60 +5,64 @@
usage
=
'
Use star to align fastq files against a genome
Usage: s
pin.R
<igenome> <fastq_files>...
Usage: s
tar_align.sh
<igenome> <fastq_files>...
Options:
'
-c
Cache results
#
-c Cache results
#eval $(echo "$usage" | ~/bin/docopts/docopts -h - -A dopts : asdf asdf.fastq)
eval
$(
echo
"
$usage
"
| ~/bin/docopts/docopts
-h
- : asdf asdf.fastq another.fastq
)
for
fastqFile
in
;
do
echo
processing
$fastqFile
done
eval
$(
echo
"
$usage
"
| ~/bin/docopts/docopts
-h
-
-A
dopts :
"
$@
"
)
#eval $(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz)
echo
$igenome
#for fastqFile in ${fastq_files[@]} ; do
# echo processing $fastqFile
#done
#
#echo $igenome
gtfFile
=
$1
bamDir
=
$2
labels
=
$3
## build index if not present
echo
"
${
!dopts[@]
}
"
echo
"
${
dopts
[-c]
}
"
echo
"
${
dopts
[igenome]
}
"
#http://www.artificialworlds.net/blog/2012/10/17/bash-associative-array-examples/
#args=$(echo "$usage" | ~/bin/docopts/docopts -h - : "test.R")
export
star_index
=
"
$igenome
/Sequence/StarIndex/genome"
export
gtfFile
=
"
$igenome
/Annotation/Genes/genes.gtf"
head
$gtfFile
## this will define one environment variable per parameter (c,e,m,v)
eval
$(
echo
"
$usage
"
| ~/bin/docopts/docopts
-h
- :
-e
test.R
)
if
[
!
-f
$gtfFile
]
;
then
>
&2
echo
"gtf '
$gtfFile
' does not exis"
;
exit
1
;
fi
fastqFiles
=
${
fastq_files
[@]
}
## basic
#http://www.homolog.us/blogs/blog/2012/11/02/star-really-kick-ass-rna-seq-aligner/
# IGENOME=/projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38
if
!
-d
"
${
igenome
}
/Sequence/StarIndex"
]
;
then
if
[
-z
"
$IGENOME
"
]
||
[
-z
"
$fastqFiles
"
]
;
then
echo
"Usage: dge_tophat_se -i <path to igenome> [<fastq.gz file>]+"
>
&2
;
return
;
fi
mailme
"
${
project
}
: creating STAR index for
$igenome
"
mkdir
${
igenome
}
/Sequence/StarIndex
cmd
=
"STAR --runMode genomeGenerate --genomeDir
${
igenome
}
/Sequence/StarIndex --genomeFastaFiles
${
igenome
}
/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
#eval $cmd
#STAR --runMode genomeGenerate --genomeDir ${igenome}/Sequence/StarIndex --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
mysub
"
${
project
}
_star_index"
"
$cmd
"
-n
5
-R
span[hosts
=
1]
-q
medium | blockScript
## prevent modification
chmod
-R
-w
${
igenome
}
/Sequence/StarIndex
fi
export
bowtie_gindex
=
"
$IGENOME
/Sequence/Bowtie2Index/genome"
export
gtfFile
=
"
$IGENOME
/Annotation/Genes/genes.gtf"
#head $gtfFile
if
[
!
-f
$gtfFile
]
;
then
>
&2
echo
"gtf '
$gtfFile
' does not exis"
;
return
;
>
&2
echo
"gtf '
$gtfFile
' does not exis"
;
exit
1
;
fi
if
[
-z
"
$(
which
tophat
)
"
]
;
then
>
&2
echo
"no tomcat binary in PATH"
;
return
;
if
[
-z
"
$(
which
STAR
)
"
]
;
then
>
&2
echo
"no tomcat binary in PATH"
;
exit
1
;
fi
echo
"running
tophat
using igenome '
$
IGENOME
' for the following files"
echo
"running
STAR
using igenome '
$
{
igenome
}
' for the following files"
ll
$fastqFiles
#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)
...
...
@@ -72,11 +76,11 @@ for fastqFile in $fastqFiles ; do
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
mysub
"
${
project
}
__tophat__
${
fastqBaseName
}
"
"
tophat -p6 -G
$gtfFile
-g1 -o
$outputdir
$bowtie_gindex
$fastqFile
mv
$outputdir
/accepted_hits.bam
$outputdir
/
$(
basename
$outputdir
)
.bam
samtools index
$outputdir
/
$(
basename
$outputdir
)
.bam
## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
mysub
"
${
project
}
__star__
${
fastqBaseName
}
"
"
# tophat -p6 -G
$gtfFile
-g1 -o
$outputdir
$bowtie_gindex
$fastqFile
STAR --genomeDir
$star_index
--readFilesIn /path/to/read1
$fastqFile
--runThreadN 6 --outFileNamePrefix
$fastqBaseName
--outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile
"
-n
5
-R
span[hosts
=
1]
-q
long | joblist .tophatjobs
done
...
...
@@ -87,6 +91,7 @@ wait4jobs .tophatjobs
dge_bam_correlate
.
## create tophat mapping report
spin.R
${
NGS_TOOLS
}
/dge_workflow/tophat_qc.R
.
## todo adjust report to STAR
#spin.R ${NGS_TOOLS}/dge_workflow/tophat_qc.R .
mailme
"
$project
: tophat done in
$(
pwd
)
"
\ No newline at end of file
mailme
"
$project
: STAR done in
$(
pwd
)
"
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment