Commit 72a97eec authored by Holger Brandl's avatar Holger Brandl

improved master template

parent c074b546
......@@ -25,6 +25,7 @@ To use the structure from above when working on bioinformatics-srv1 just use
/home/brandl/mnt/mack/bioinfo/scripts/ngs_tools/v1.0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ToDo
====
......
......@@ -3,9 +3,11 @@
#' # Create an IGV Session File
#' ## Create an IGV Session File
#' hallo
#http://www.slideshare.net/EdwindeJonge1/docopt-user2014
suppressMessages(library(docopt))
#doc <- 'Usage: igvtrack.R --genome [options] <genome> <bam_files>...
doc <- 'Usage: igvtrack.R --genome [options] <genome> <bam_files>...
Options:
......
export baseDir=<<PATH_TO_BASEDIR>>
# todo define project name
export project=<<PROJECTNAME>>
# screen -R $project
## intially
DGE_HOME=/projects/bioinfo/holger/bioinfo_templates/dge_workflow
## when customization is needed
#export DGE_HOME=/sw/users/brandl/projects/<<PROJECTNAME>>
## madmax
if [ -n "$LSF_SERVERDIR" ]; then
export baseDir="/projects/bioinfo/holger/projects/${project}"
export PROJECT_SCRIPTS="/projects/bioinfo/holger/scripts/${project}"
export DGE_HOME="/projects/bioinfo/scripts/ngs_tools/v1.0/"
fi
source $DGE_HOME/dge_utils.sh
export PATH=$DGE_HOME:$DGE_HOME/../misc/:$PATH
## bioinfo
if [ $(hostname) == "bioinformatics-srv1" ]; then
#<<<todo define paths on bioinfo>>>
#export baseDir=/home/brandl/mnt/chip-seq_study/ChIPSeq_March_2015/data
#export PROJECT_SCRIPTS=/home/brandl/mnt/chip-seq_study/ChIPSeq_March_2015/scripts
export DGE_HOME="/home/brandl/mnt/mack/bioinfo/scripts/ngs_tools/v1.0"
fi
source $DGE_HOME/dge_workflow/dge_utils.sh
export PATH=$DGE_HOME/dge_workflow:$PATH
## todo define igenome to be used
## igenome=/projects/bioinfo/igenomes/Canis_familiaris/Ensembl/CanFam3.1
igenome=<<<<TBD>>>>
########################################################################################################################
### Fetch the data
mcdir $baseDir/originals
wget -nc --user="USER" --password="PW" -r --no-directories --no-check-certificate -A "*fastq.gz" https:/projects.biotec.tu-dresden.de/ngs-filesharing/martaf/
mailme "$project: fastq download done"
## todo make sure to also copy the sample sheet in here
########################################################################################################################
### QC
dge_fastqc $(ls *fastq.gz) &
########################################################################################################################
### Apply renaming and merge lane replicates (but keep technical ones)
## todo adjust renaming scheme to project specifics
mcdir $baseDir/lanereps_pooled
echo '
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
options(java.parameters = "-Xmx4g" ); library(xlsx)
#sheetFile <- "/projects/bioinfo/holger/projects/stefania_isnm1/originals/stefaniat-FC_SN678_293-2014-11-26.xls"
sheetFile <- commandArgs(T)[1]
sampleSheet <- read.xlsx2(sheetFile, "Fastqfiles") %>%
select(File, SampleID, SampleName) %>%
## clean up the formatting a bit
mutate(
SampleName=str_replace(SampleName, "Ctrl14", "Ctrl_14"),
SampleName=ifelse(str_detect(SampleName, "_R"), SampleName, paste0(SampleName, "_R1")),
SampleName=str_replace(SampleName, "_14", "_"),
SampleName=str_replace(SampleName, "_R", "_TR")
) %>%
## also a add a column containing just biological replicate info (for bam merging later)
mutate(
bio_sample=str_replace(SampleName, "_TR(1|2)", "")
)
write.delim(sampleSheet, file="renaming_scheme.txt")
require(ggplot2)
#ggplot(sampleSheet, aes(SampleName)) + geom_bar() + coord_flip()
## merge lane replication
library(foreach)
library(doMC)
registerDoMC(cores=4)
#sampleSheet %>% group_by(SampleName) %>% do(with(., print(paste("zcat", paste(File, collapse=" "), "| gzip -c >", paste0(SampleName, ".fastq.gz")))))
## with parallelization
sampleSheet %>% d_ply(.(SampleName), with,
system(paste("zcat", paste(paste0("../originals/", File), collapse=" "), "| gzip -c >", paste0(SampleName, ".fastq.gz")))
, .parallel=T)
' | R --vanilla -q --args $baseDir/originals/stefaniat-FC_SN678_293-2014-11-26.xls
########################################################################################################################
### Trim low-quality bases and remove left-over adapters
mcdir $baseDir/trimmed
dge_cutadapt $(ls $baseDir/treps_pooled/*fastq.gz) 2>&1 | tee cutadapt.log
dge_cutadapt $(ls $baseDir/lanereps_pooled/*fastq.gz) 2>&1 | tee cutadapt.log
dge_fastqc $(ls *fastq.gz) &
......@@ -34,10 +107,6 @@ dge_fastqc $(ls *fastq.gz) &
mcdir $baseDir/mapped
fastqFiles=$(ls $baseDir/trimmed/*.fastq.gz)
igenome=<<<<TBD>>>>
## Example:
## igenome=/projects/bioinfo/igenomes/Canis_familiaris/Ensembl/CanFam3.1
dge_tophat_se -i $igenome $fastqFiles 2>&1 | tee mapped.log
......@@ -67,25 +136,32 @@ gtfFile=$igenome/Annotation/Genes/genes.gtf
## define labels to split bam files into replicate groups
#labels=$(csvcut -t -c1 $baseDir/lanereps_pooled/sample2replicates.txt | tail -n+2 | uniq | xargs echo | sed 's/ /,/g')
## better externalize them
labels=<<<<TBD>>>>
dge_cuffdiff $gtfFile $baseDir/mapped $labels
mailme "$project: cuffdiff done"
mcdir $baseDir/cuffdiff/dge_report
#export DGE_PARAMS="-S"
spin.R $DGE_HOME/dge_analysis.R $baseDir/cuffdiff
spin.R $DGE_HOME/cuffdiff_report.R ..
## or when using specfic contrasts
#echo "Ctrl_12h,X11B_12h
#Ctrl_36h,X11B_36h" > contrasts.txt
#spin.R $DGE_HOME/cuffdiff_report.R \""--constrasts contrasts.txt --pcutoff 0.01 $baseDir/cuffdiff"\"
########################################################################################################################
### Sync back to project space
# ... project specific stuff
# uni
screen -R rsync_$project
## main sync
## todo define mount path on bioinfo for bidirectional synching
~/bin/unison $baseDir ssh://bioinfo///home/brandl/mnt/<<MOUNT_PATH>> -fastcheck true -times -perms 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment