dge_master_template.sh 5.68 KB
Newer Older
Holger Brandl's avatar
Holger Brandl committed
1
# todo define project name
Holger Brandl's avatar
Holger Brandl committed
2
export project=<<PROJECTNAME>>
Holger Brandl's avatar
Holger Brandl committed
3
# screen -R $project
Holger Brandl's avatar
Holger Brandl committed
4 5


Holger Brandl's avatar
Holger Brandl committed
6 7 8
## madmax
if [ -n "$LSF_SERVERDIR" ]; then
    export baseDir="/projects/bioinfo/holger/projects/${project}"
9
    export PROJECT_SCRIPTS="/projects/bioinfo/brandl/scripts/${project}"
10
    export NGS_TOOLS="/projects/bioinfo/scripts/ngs_tools/v1.1"
Holger Brandl's avatar
Holger Brandl committed
11
fi
Holger Brandl's avatar
Holger Brandl committed
12

Holger Brandl's avatar
Holger Brandl committed
13 14 15 16 17
## bioinfo
if [ $(hostname) == "bioinformatics-srv1" ]; then
    #<<<todo define paths on bioinfo>>>
    #export baseDir=/home/brandl/mnt/chip-seq_study/ChIPSeq_March_2015/data
    #export PROJECT_SCRIPTS=/home/brandl/mnt/chip-seq_study/ChIPSeq_March_2015/scripts
18
    export NGS_TOOLS="/home/brandl/mnt/mack/bioinfo/scripts/ngs_tools/v1.1"
Holger Brandl's avatar
Holger Brandl committed
19
fi
Holger Brandl's avatar
Holger Brandl committed
20 21


22 23
source ${NGS_TOOLS}/dge_workflow/dge_utils.sh
export PATH=${NGS_TOOLS}/dge_workflow:$PATH
Holger Brandl's avatar
Holger Brandl committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41


## todo define igenome to be used
## igenome=/projects/bioinfo/igenomes/Canis_familiaris/Ensembl/CanFam3.1
igenome=<<<<TBD>>>>

########################################################################################################################
### Fetch the data

mcdir $baseDir/originals

wget -nc --user="USER" --password="PW"  -r --no-directories  --no-check-certificate -A "*fastq.gz" https:/projects.biotec.tu-dresden.de/ngs-filesharing/martaf/

mailme "$project: fastq download done"


## todo make sure to also copy the sample sheet in here

Holger Brandl's avatar
Holger Brandl committed
42
########################################################################################################################
43
### Basic QC
Holger Brandl's avatar
Holger Brandl committed
44

45
dge_fastqc $(ls *fastq.gz) &
Holger Brandl's avatar
Holger Brandl committed
46

Holger Brandl's avatar
Holger Brandl committed
47
########################################################################################################################
48
### Apply renaming and merge lane replicates (but keep technical ones)
Holger Brandl's avatar
Holger Brandl committed
49 50 51 52 53 54

## todo adjust renaming scheme to project specifics

mcdir $baseDir/lanereps_pooled

echo '
55
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.10/R/core_commons.R")
56

Holger Brandl's avatar
Holger Brandl committed
57 58
options(java.parameters = "-Xmx4g" ); library(xlsx)

Holger Brandl's avatar
Holger Brandl committed
59
sheetFile <- "../originals/natalied-FC_SN678_338-2015-5-12.xls"
Holger Brandl's avatar
Holger Brandl committed
60

Holger Brandl's avatar
Holger Brandl committed
61 62 63 64 65 66 67 68
# nc: no culture
# na: no hormone
# ECD: ecdysone
# INS: insulin
## first number : biological repliciate
## last number: time

renaming_scheme=c("NC" = "no_culture", "NA" = "no_hormone", "ECD"="ecdysone_", "INS"="insulin_", "9"="9h", "4"="4h")
Holger Brandl's avatar
Holger Brandl committed
69

Holger Brandl's avatar
Holger Brandl committed
70
sampleSheet <- read.xlsx2(sheetFile, "Fastqfiles") %>%
Holger Brandl's avatar
Holger Brandl committed
71
    select(File, SampleName) %>%
Holger Brandl's avatar
Holger Brandl committed
72
    mutate(
Holger Brandl's avatar
Holger Brandl committed
73 74 75
        bio_replicate=str_match(SampleName, "(.).*")[,2],
        sample = str_replace(SampleName, "[0-9]*", "") %>% str_replace_all(renaming_scheme),
        bio_sample=paste(sample, bio_replicate, sep="_")
Holger Brandl's avatar
Holger Brandl committed
76 77 78 79
    )

write.delim(sampleSheet, file="renaming_scheme.txt")

Holger Brandl's avatar
Holger Brandl committed
80
#sampleSheet %>% count(bio_sample)
Holger Brandl's avatar
Holger Brandl committed
81

Holger Brandl's avatar
Holger Brandl committed
82 83 84
#require(ggplot2)
#ggplot(sampleSheet, aes(bio_sample)) + geom_bar() + coord_flip()
## merge lane replication
Holger Brandl's avatar
Holger Brandl committed
85

86 87

## rather write file
Holger Brandl's avatar
Holger Brandl committed
88 89
sampleSheet %>% group_by(bio_sample) %>% summarise(
    zcat=paste("zcat", paste(paste0("../originals/", File), collapse=" "), "| gzip -c >", paste0(bio_sample[1], ".fastq.gz"))
90
) %>% with(zcat) %>% write.delim(header=F, file="lane_merge.cmd", quote=F)
Holger Brandl's avatar
Holger Brandl committed
91
' | R --vanilla -q
Holger Brandl's avatar
Holger Brandl committed
92

93 94 95
cat lane_merge.cmd | while read line; do
    mysub "${project}__repmerge" "$line" | joblist .repmerge
done
Holger Brandl's avatar
Holger Brandl committed
96

Holger Brandl's avatar
Holger Brandl committed
97

98 99 100 101 102 103

wait4jobs .repmerge
mailme "$project: replicate merging done"

dge_fastqc $(ls *fastq.gz) &

104 105 106 107
########################################################################################################################
### Trim low-quality bases and remove left-over adapters

mcdir $baseDir/trimmed
Holger Brandl's avatar
Holger Brandl committed
108

Holger Brandl's avatar
Holger Brandl committed
109
dge_cutadapt $(ls $baseDir/lanereps_pooled/*fastq.gz) 2>&1 | tee cutadapt.log
Holger Brandl's avatar
Holger Brandl committed
110

111
dge_fastqc $(ls *fastq.gz) &
Holger Brandl's avatar
Holger Brandl committed
112 113 114


########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
115
### Align the reads
Holger Brandl's avatar
Holger Brandl committed
116 117 118

mcdir $baseDir/mapped

119
fastqFiles=$(ls $baseDir/trimmed/*.fastq.gz)
Holger Brandl's avatar
Holger Brandl committed
120 121 122

dge_tophat_se -i $igenome $fastqFiles 2>&1 | tee mapped.log

123 124 125
## also create bigwig files
# dge_bigwig ${igenome}/Sequence/Bowtie2Index/genome.fa.fai $(find . -name "*.bam" | grep -v unmapped |  xargs echo) &

Holger Brandl's avatar
Holger Brandl committed
126 127
mailme "$project: mapping done"

128

Holger Brandl's avatar
Holger Brandl committed
129
#########################################################################################################################
Holger Brandl's avatar
Holger Brandl committed
130
#### Technical replicate grouping
Holger Brandl's avatar
Holger Brandl committed
131 132 133 134 135 136 137 138 139 140
#
#mcdir $baseDir/trep_pooled
#
#
#bio_reps=<<<biological replicates labels>>>
### Examples
## bio_reps=$(csvcut  -tc bio_sample  ../lanereps_pooled/renaming_scheme.txt |  tail -n+2 | sort -u | xargs echo | tr " " ",")
### bio_reps="ctrl,isnm1"
#
#dge_merge_treps $baseDir/mapped/ $bio_reps
141

Holger Brandl's avatar
Holger Brandl committed
142 143 144 145 146 147 148

########################################################################################################################
### Do the differential expression analysis

mcdir $baseDir/cuffdiff

gtfFile=$igenome/Annotation/Genes/genes.gtf
Holger Brandl's avatar
Holger Brandl committed
149
head $gtfFile
Holger Brandl's avatar
Holger Brandl committed
150 151

## define labels to split bam files into replicate groups
Holger Brandl's avatar
Holger Brandl committed
152
labels=$(csvcut -t -c4 $baseDir/lanereps_pooled/renaming_scheme.txt | tail -n+2 |sort | uniq | xargs echo | sed 's/ /,/g')
Holger Brandl's avatar
Holger Brandl committed
153 154
## todo better externalize them
#labels=<<<<TBD>>>>
Holger Brandl's avatar
Holger Brandl committed
155 156 157

dge_cuffdiff $gtfFile $baseDir/mapped $labels

158 159 160
mailme "$project: cuffdiff done"

mcdir $baseDir/cuffdiff/dge_report
Holger Brandl's avatar
Holger Brandl committed
161

162
spin.R ${NGS_TOOLS}/dge_workflow/cuffdiff_report.R ..
Holger Brandl's avatar
Holger Brandl committed
163 164 165 166

## or when using specfic contrasts
#echo "Ctrl_12h,X11B_12h
#Ctrl_36h,X11B_36h" > contrasts.txt
167
#spin.R $NGS_TOOLS/dge_workflow/cuffdiff_report.R  \""--constrasts contrasts.txt --pcutoff 0.01 $baseDir/cuffdiff"\"
Holger Brandl's avatar
Holger Brandl committed
168

169 170 171 172 173


########################################################################################################################
### Sync back to project space

Holger Brandl's avatar
Holger Brandl committed
174
## bidirectional sync with project space
Holger Brandl's avatar
Holger Brandl committed
175
## todo define mount path on bioinfo for bidirectional synching
Holger Brandl's avatar
Holger Brandl committed
176 177
~/bin/unison $baseDir ssh://bioinfo///home/brandl/mnt/<<MOUNT_PATH>> -fastcheck true -times -perms 0

Holger Brandl's avatar
Holger Brandl committed
178 179 180
# uni-directional sync
#rsync -avsn --delete  $baseDir brandl@fileserver:/projects//file/server/path
mailme "$project: sync done"