Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
ngs_tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bioinfo
ngs_tools
Commits
cebd3cb7
Commit
cebd3cb7
authored
9 years ago
by
Holger Brandl
Browse files
Options
Downloads
Patches
Plain Diff
improved star integration
parent
49599782
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
dge_workflow/dge_master_template.sh
+3
-0
3 additions, 0 deletions
dge_workflow/dge_master_template.sh
dge_workflow/dge_utils.sh
+32
-0
32 additions, 0 deletions
dge_workflow/dge_utils.sh
dge_workflow/star_align.sh
+11
-23
11 additions, 23 deletions
dge_workflow/star_align.sh
with
46 additions
and
23 deletions
dge_workflow/dge_master_template.sh
+
3
−
0
View file @
cebd3cb7
...
...
@@ -120,6 +120,9 @@ fastqFiles=$(ls $baseDir/trimmed/*.fastq.gz)
dge_tophat_se -i
$igenome
$fastqFiles
2>&1 | tee mapped.log
## also create bigwig files
# dge_bigwig
${
igenome
}
/Sequence/Bowtie2Index/genome.fa.fai
$(
find
.
-name
"*.bam"
|
grep
-v
unmapped | xargs
echo
)
&
mailme "
$project
: mapping done"
...
...
This diff is collapsed.
Click to expand it.
dge_workflow/dge_utils.sh
+
32
−
0
View file @
cebd3cb7
...
...
@@ -398,3 +398,35 @@ mailme "$project: cuffdiff done in $(pwd)"
}
export
-f
dge_cuffdiff
# Create a star index for a given igenome
dge_create_star_index
(){
if
[
$#
-ne
1
]
;
then
echo
"Usage: dge_create_star_index <igenome>"
>
&2
;
return
;
fi
igenome
=
$1
if
[
!
-d
"
$igenome
"
]
|
[
!
-d
"
${
igenome
}
/Sequence"
]
;
then
echo
"igenome directory '
$igenome
' does not exist"
>
&2
;
return
;
fi
export
star_index
=
"
${
igenome
}
/Sequence/StarIndex"
chmod
+w
$(
dirname
${
star_index
}
)
mailme
"
${
project
}
: creating STAR index in
${
star_index
}
"
mkdir
${
star_index
}
cmd
=
"STAR --runMode genomeGenerate --genomeDir
${
star_index
}
--genomeFastaFiles
${
igenome
}
/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
#eval $cmd
#STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
mysub
"
${
project
}
_star_index"
"
$cmd
"
-n
5
-R
span[hosts
=
1]
-q
medium | blockScript
## prevent further modification
chmod
-w
$(
dirname
${
star_index
}
)
mailme
"created star index for
$igenome
"
}
export
-f
dge_create_star_index
This diff is collapsed.
Click to expand it.
dge_workflow/star_align.sh
+
11
−
23
View file @
cebd3cb7
...
...
@@ -23,12 +23,6 @@ eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : "$@")"
#eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : /projects/bioinfo/igenomes/Mus_musculus/Ensembl/GRCm38 /projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz fastq2)"
for
fastqFile
in
${
fastq_files
[@]
}
;
do
echo
processing
$fastqFile
done
#
#echo $igenome
## extract all configuration parameters
fastqFiles
=
${
fastq_files
[@]
}
...
...
@@ -52,18 +46,8 @@ fi
## build index if not present
if
[
!
-f
"
${
star_index
}
/SA"
]
;
then
chmod
+w
$(
dirname
${
star_index
}
)
mailme
"
${
project
}
: creating STAR index for
$igenome
"
mkdir
${
star_index
}
cmd
=
"STAR --runMode genomeGenerate --genomeDir
${
star_index
}
--genomeFastaFiles
${
igenome
}
/Sequence/WholeGenomeFasta/genome.fa --runThreadN 10"
#eval $cmd
#STAR --runMode genomeGenerate --genomeDir ${star_index} --genomeFastaFiles ${igenome}/Sequence/Chromosomes/*.fa --runThreadN 10
mysub
"
${
project
}
_star_index"
"
$cmd
"
-n
5
-R
span[hosts
=
1]
-q
medium | blockScript
## prevent modification
chmod
-w
$(
dirname
${
star_index
}
)
echo
"Missing STAR for
${
star_index
}
/SA; use 'dge_create_star_index
${
igenome
}
' to create it"
;
exit
1
;
# dge_create_star_index ${igenome}'
fi
...
...
@@ -73,7 +57,7 @@ echo "running STAR using igenome '${igenome}' for the following files"
#fastqFiles=$(ls $baseDir/treps_pooled/*fastq.gz)
for
fastqFile
in
$fastqFiles
;
do
echo
"submitting
tophat
job for
$fastqFile
"
echo
"submitting
STAR
job for
$fastqFile
"
# DEBUG fastqFile=/projects/bioinfo/holger/projects/florio_11b_2nd_batch/lanereps_pooled/arhgap11b_1.fastq.gz
fastqBaseName
=
$(
basename
${
fastqFile
%%.fastq.gz
}
)
...
...
@@ -82,15 +66,19 @@ for fastqFile in $fastqFiles ; do
## uniquely mapping reads only: http:/seqanswers.com/forums/showthread.php?s=93da11109ae12a773a128a637d69defe&t=3464
### tophat -p6 -G $gtfFile -g1 -o test $bowtie_gindex $fastqFile
## note --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
## params:
# --outSAMstrandField intronMotif is required for cuffdiff compatiblity (xs flag)
# --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout to get rid of artifical junctions
# --quantMode GeneCounts see https://groups.google.com/forum/#!searchin/rna-star/GeneCounts/rna-star/gZRJx3ElRNo/p5FjBYKuY00J
mysub
"
${
project
}
__star__
${
fastqBaseName
}
"
"
STAR --genomeDir
$star_index
--readFilesIn
$fastqFile
--runThreadN 6 --readFilesCommand zcat --outFileNamePrefix
${
fastqBaseName
}
. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile
$gtfFile
STAR --genomeDir
$star_index
--readFilesIn
$fastqFile
--runThreadN 6 --readFilesCommand zcat --outFileNamePrefix
${
fastqBaseName
}
. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile
$gtfFile
--outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts
mv
${
fastqBaseName
}
.Aligned.sortedByCoord.out.bam
${
fastqBaseName
}
.bam
samtools index
${
fastqBaseName
}
.bam
"
-n
5
-R
span[hosts
=
1]
-q
short
| joblist .
tophat
jobs
"
-n
5
-R
span[hosts
=
1]
-q
medium
| joblist .
star
jobs
done
wait4jobs .
tophat
jobs
wait4jobs .
star
jobs
rm
-rf
*
STARgenome
*
.Log.progress.out _STARtmp
*
.SJ.out.tab
*
Log.out
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment