# TODO define project name export project="TODO define project name" # screen -R ${project} ## madmax if [ "$HOSTNAME"=="falcon1" ]; then export baseDir="/projects/bioinfo/holger/projects/${project}" export PROJECT_SCRIPTS="/projects/bioinfo/holger/scripts/${project}" export NGS_TOOLS="/projects/bioinfo/scripts/ngs_tools/dev" fi ## bioinfo if [ $(hostname) == "bioinformatics-srv1" ]; then export baseDir=/net/mack/lustre/projects/bioinfo/holger/projects/${project} export PROJECT_SCRIPTS==/net/mack/lustre/projects/bioinfo/holger/scripts/${project} export NGS_TOOLS=/net/mack/lustre/projects/bioinfo/scripts/ngs_tools/dev fi source ${NGS_TOOLS}/dge_workflow/dge_utils.sh export PATH=${NGS_TOOLS}/dge_workflow:$PATH ## TODO define igenome to be used ## igenome=/projects/bioinfo/igenomes/Canis_familiaris/Ensembl/CanFam3.1 #igenome=<<<>>> ######################################################################################################################## ### Fetch the data mcdir ${baseDir}/originals wget -nc --user="USER" --password="PW" -r --no-directories --no-check-certificate -A "*fastq.gz" https:/projects.biotec.tu-dresden.de/ngs-filesharing/martaf/ mailme "$project: fastq download done" ### Basic QC dge_fastqc $(ls *fastq.gz) & ## todo make sure to also copy the sample sheet in here ######################################################################################################################## ### Apply renaming and merge lane replicates (but keep technical ones) ## todo adjust renaming scheme to project specifics mcdir $baseDir/lanereps_pooled echo ' devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.36/R/core_commons.R") sheetFile <- "../originals/natalied-FC_SN678_338-2015-5-12.xls" sampleSheet <- read_excel(sheetFile, "Fastqfiles") %>% select(File, SampleName) %>% mutate( bio_replicate=str_match(SampleName, "(.).*")[,2], sample = str_replace(SampleName, "[0-9]*", "") %>% str_replace_all(c("NC" = "no_culture", "NA" = "no_hormone", "ECD"="ecdysone_", "INS"="insulin_", "9"="9h", "4"="4h")), bio_sample=paste(sample, bio_replicate, sep="_") ) write_tsv(sampleSheet, path="renaming_scheme.txt") require(ggplot2) ggplot(sampleSheet, aes(bio_sample)) + geom_bar() + coord_flip() sampleSheet %>% group_by(bio_sample) %>% summarise( zcat=paste("zcat", paste(paste0("../originals/", File), collapse=" "), "| gzip -c >", paste0(bio_sample[1], ".fastq.gz")) ) %$% zcat %>% write_lines("lane_merge.cmd") sampleSheet %>% distinct(replicate, sample) %>% arrange(sample, replicate) %>% write_tsv(path="basic_design.txt") ' | R --vanilla -q cat lane_merge.cmd | while read line; do # eval ${line} jl submit -j .repmerge "$line" done jl wait --email --report dge_fastqc $(ls *fastq.gz) & ######################################################################################################################## ### Alignment the reads mcdir $baseDir/alignments star_align.kts ${igenome} $(ls ${baseDir}/lanereps_pooled/*.fastq.gz) 2>&1 | tee star_algin.log #dge_bam_correlate . & # part of star_align.kts now mailme "$project: mapping done" ######################################################################################################################## ### Differential Expression Analysis mcdir $baseDir/dge_analysis ## build custom design matrix #csvcut -tc replicate,sample $baseDir/lanereps_pooled/renaming_scheme.txt | csvformat -T > design_matrix.txt #Rscript - <<"EOF" #devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.36/R/core_commons.R") #file.path(Sys.getenv("baseDir"), "lanereps_pooled/renaming_scheme.txt") %>% # read_tsv() %>% # distinct(replicate, sample) %>% # arrange(sample, replicate) %>% # write_tsv("design_matrix.txt") #EOF ## or use just some contrasts of interest #echo " #sample_1, sample_2 #unpolarised,liver_polar_stage3 #" | trim | csvformat -T > contrasts.txt # rend.R -e ${NGS_TOOLS}/dge_workflow/featcounts_deseq_mf.R ../alignments/star_counts_matrix.txt 2>&1 | tee featcounts_deseq.log #rend.R -e ${NGS_TOOLS}/dge_workflow/featcounts_deseq_mf.R --design 'batch+sample' --pcutoff 0.01 --contrasts ../contrasts_wt.txt ../qseq.counts.txt ../qseq.design_matrix.txt 2>&1 | tee featcounts_deseq_mf.log ## Term enrichment mcdir ${baseDir}/dge_analysis/dge_enrichment_analysis #$NGS_TOOLS/common/cp_enrichment.R --overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast rend.R -e ${NGS_TOOLS}/common/cp_enrichment.R --overlay_expr_data ../plot_score_matrix.txt ../degs_by_contrast.txt contrast ######################################################################################################################## ### Sync back to project space ## version common tools ## todo make sure to stay in current directory #{ (cd ${NGS_TOOLS} && git describe --tags) } >> .used_ngs_tools (cd ${NGS_TOOLS} && test -n "$project" && git diff --exit-code && git tag "${project}__$(date +'%Y%m%d')") || echo "could not tag current branch" # and log it git describe --tags >> ${baseDir}/.used_ngs_tools ## bidirectional sync with project space #remoteProjectDir=/net/fileserver-nfs/stornext/snfs3/projects/ TODO define mount path on bioinfo for bidirectional synching (ls "${remoteProjectDir}" "${baseDir}" && ~/bin/unison ${baseDir} ${remoteProjectDir}/data -fastcheck -times -perms 0 -batch) || echo "could not sync data" ## update scripts working copy in project space #git clone "git@git.mpi-cbg.de:bioinfo/${project}.git" ${remoteProjectDir}/scripts (ls "${remoteProjectDir}/scripts" > /dev/null && cd ${remoteProjectDir}/scripts/ && git pull) || echo "could not sync data" ## also update the git copy of the script # or use a uni-directional sync #rsync -avsn --delete ${baseDir} brandl@fileserver:/projects//file/server/path mailme "$project: sync done"