Commit f3ecbac4 authored by Lena Hersemann's avatar Lena Hersemann
Browse files

added scripts

parent 80c6feac
This diff is collapsed.
This diff is collapsed.
##======================================================================================================================
## Differential abundance analysis workflow
##======================================================================================================================
## Define paths to data and scripts
export PRJ_NAME="schuhmacher_binding_sites"
# screen -R ${PRJ_NAME}
export PRJ_DATA="/project/data/proteomics_data"
export PRJ_SCRIPTS="/project/scripts/${PRJ_NAME}"
export MS_CONDA_ENV="/project/conda/envs/ms_BioC3.8"
umask u=rwx,g=rwx,o=
ls "${PRJ_DATA}" "${PRJ_SCRIPTS}" "${MS_CONDA_ENV}" >/dev/null || { echo "not all project resources are well defined" 1>&2; exit 1; }
## Define functions, tools and further settings
mcdir(){
if [ ! -d "$1" ]; then
mkdir -p "$1";
fi;
cd "$1";
}
export -f mcdir
## activate conda environment
conda activate ${MS_CONDA_ENV}
##======================================================================================================================
## deposit MaxQuant files and in 'originals' folder
mcdir ${PRJ_DATA}/originals
## activate conda environment
conda activate ${MS_CONDA_ENV}
## filter proteinGroups.txt file for protein groups with >= 1 unique peptide and generate renaming scheme and sample pre-processing file
Rscript - << "EOF"
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.45/R/core_commons.R")
library(data.table)
is.integer64 <- function(x){
class(x)=="integer64"
}
## read in data
data <- fread(file.path(Sys.getenv("PRJ_DATA"), "originals/proteinGroups.txt"), integer64 = "numeric") %>%
pretty_columns() %>%
as.data.frame() %>%
mutate_if(is.integer64, ~as.numeric(.))
## check number of proteinGroups with >= 1 unique peptides (data: 2022 >= 1)
count(data, unique_peptides >=1)
## extract proteins with at least 1 unique peptide
prot_to_keep <- data %>%
select(protein_ids, unique_peptides) %>%
filter(unique_peptides >= 1) %$% protein_ids %>% unique()
length(prot_to_keep)
## export proteinGroups.txt file
data %>%
filter(protein_ids %in% prot_to_keep) %>%
write_tsv("proteinGroups.txt")
## generate renaming scheme
renamingData <- data.frame(old = colnames(data)[which(str_detect(colnames(data), "lfq_intensity_"))]) %>%
mutate(old = str_remove(old, "lfq_intensity_"), new = c("GST_1", "P5_1", "GST_2", "P5_2", "GST_3", "P5_3"))
renamingData %>% write_tsv("renaming_scheme.txt")
## generate sample_info file
renamingData %>%
transmute(condition = str_remove(new, "_[1-3]$"), replicate = new) %>%
write_tsv("sample_info.txt")
EOF
## run pre processing of MaxQuant output
mcdir ${PRJ_DATA}/proteomics_data/ms_workflow/final/pre_processing
rend.R -e --out data_prep ${PRJ_SCRIPTS}/01-ms-dataprep-qc.R --results_prefix data_prep --data_type ibaq --reorder_protein_ids FALSE --renaming_scheme ../renaming_scheme.txt ../ ../sample_info.txt
## run differential abundance analysis using DEP v1.4.0 with lfc = 1, qcutoff = 0.05 and mixed imputation using knn for MAR and MinDet for MNAR
mcdir ${PRJ_DATA}/proteomics_data/ms_workflow/final/dep_analysis
rend.R -e --out ms_dep ${PRJ_SCRIPTS}/02-ms-DEP-analysis.R --results_prefix ms_dep --lfc 1 --qcutoff 0.05 --imputation mixed ../pre_processing/data_prep.intensities_with_NAs.txt ../sample_info.txt ../pre_processing/data_prep.feature_information.txt
\ No newline at end of file
name: null
channels:
- conda-forge
- bioconda
- defaults
- ii-bioinfo
- f30a78ec8
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_llvm
- _r-mutex=1.0.1=anacondar_1
- attrs=19.3.0=py_0
- backcall=0.1.0=py_0
- binutils_impl_linux-64=2.34=h53a641e_0
- binutils_linux-64=2.34=hc952b39_20
- bioconductor-affy=1.60.0=r351h14c3975_0
- bioconductor-affyio=1.52.0=r351h14c3975_0
- bioconductor-annotationdbi=1.44.0=r351_0
- bioconductor-biobase=2.42.0=r351h14c3975_1
- bioconductor-biocgenerics=0.28.0=r351_1
- bioconductor-biocparallel=1.16.6=r351h1c2f66e_0
- bioconductor-biocstyle=2.12.0=r351_0
- bioconductor-clusterprofiler=3.10.1=r351_0
- bioconductor-complexheatmap=1.20.0=r351_0
- bioconductor-delayedarray=0.8.0=r351h14c3975_0
- bioconductor-dep=1.4.0=r351_0
- bioconductor-do.db=2.9=r351_4
- bioconductor-dose=3.8.0=r351_0
- bioconductor-enrichplot=1.2.0=r351_0
- bioconductor-fgsea=1.8.0=r351hf484d3e_0
- bioconductor-genomeinfodb=1.18.1=r351_0
- bioconductor-genomeinfodbdata=1.2.1=r351_0
- bioconductor-genomicranges=1.34.0=r351h14c3975_0
- bioconductor-go.db=3.7.0=r351_0
- bioconductor-gosemsim=2.8.0=r351hf484d3e_0
- bioconductor-graph=1.60.0=r351h14c3975_0
- bioconductor-graphite=1.28.2=r351_0
- bioconductor-impute=1.56.0=r351h9ac9557_0
- bioconductor-iranges=2.16.0=r351h14c3975_0
- bioconductor-limma=3.38.3=r351h14c3975_0
- bioconductor-msnbase=2.8.3=r351hf484d3e_0
- bioconductor-mzid=1.20.1=r351_0
- bioconductor-mzr=2.16.2=r351hdbcaa40_1
- bioconductor-org.dm.eg.db=3.7.0=r351_1
- bioconductor-org.hs.eg.db=3.7.0=r351_0
- bioconductor-org.mm.eg.db=3.7.0=r351_0
- bioconductor-pcamethods=1.74.0=r351hf484d3e_0
- bioconductor-preprocesscore=1.44.0=r351h14c3975_0
- bioconductor-protgenerics=1.14.0=r351_0
- bioconductor-qvalue=2.14.1=r351_0
- bioconductor-reactome.db=1.66.0=r351_0
- bioconductor-reactomepa=1.26.0=r351_0
- bioconductor-rhdf5lib=1.4.3=r351h1feb10b_0
- bioconductor-s4vectors=0.20.1=r351h14c3975_0
- bioconductor-summarizedexperiment=1.12.0=r351_0
- bioconductor-vsn=3.50.0=r351h14c3975_0
- bioconductor-xvector=0.22.0=r351h14c3975_0
- bioconductor-zlibbioc=1.28.0=r351h14c3975_0
- bleach=3.1.5=pyh9f0ad1d_0
- bwidget=1.9.14=0
- bzip2=1.0.8=h516909a_2
- ca-certificates=2020.6.20=hecda079_0
- cairo=1.16.0=hcf35c78_1003
- certifi=2020.6.20=py37hc8dfbb8_0
- curl=7.68.0=hf8cf82a_0
- decorator=4.4.2=py_0
- defusedxml=0.6.0=py_0
- entrypoints=0.3=py37hc8dfbb8_1001
- fontconfig=2.13.1=h86ecdb6_1001
- freetype=2.10.2=he06d7ca_0
- fribidi=1.0.9=h516909a_0
- gcc_impl_linux-64=7.5.0=hd420e75_6
- gcc_linux-64=7.5.0=h09487f9_20
- gettext=0.19.8.1=hc5be6a0_1002
- gfortran_impl_linux-64=7.5.0=hdf63c60_6
- gfortran_linux-64=7.5.0=h09487f9_20
- glib=2.64.2=h6f030ca_1
- gmp=6.2.0=he1b5a44_2
- graphite2=1.3.13=he1b5a44_1001
- gsl=2.5=h294904e_1
- gxx_impl_linux-64=7.5.0=hdf63c60_6
- gxx_linux-64=7.5.0=h09487f9_20
- harfbuzz=2.4.0=h9f30f68_3
- hdf4=4.2.13=hf30be14_1003
- hdf5=1.10.5=nompi_h3c11f04_1104
- icu=64.2=he1b5a44_1
- importlib-metadata=1.6.0=py37hc8dfbb8_0
- importlib_metadata=1.6.0=0
- ipykernel=5.3.0=py37h43977f1_0
- ipython=7.14.0=py37hc8dfbb8_0
- ipython_genutils=0.2.0=py_1
- jedi=0.17.0=py37hc8dfbb8_0
- jinja2=2.11.2=pyh9f0ad1d_0
- jpeg=9c=h14c3975_1001
- jsonschema=3.2.0=py37hc8dfbb8_1
- jupyter_client=6.1.3=py_0
- jupyter_core=4.6.3=py37hc8dfbb8_1
- krb5=1.16.4=h2fd8d38_0
- ld_impl_linux-64=2.34=h53a641e_0
- libblas=3.8.0=16_openblas
- libcblas=3.8.0=16_openblas
- libcurl=7.68.0=hda55be3_0
- libedit=3.1.20170329=hf8c457e_1001
- libffi=3.2.1=he1b5a44_1007
- libgcc-ng=9.2.0=h24d8f2e_2
- libgfortran-ng=7.5.0=hdf63c60_6
- libgomp=9.2.0=h24d8f2e_2
- libiconv=1.15=h516909a_1006
- libidn2=2.3.0=h516909a_0
- liblapack=3.8.0=16_openblas
- libnetcdf=4.7.4=nompi_h9f9fd6a_101
- libopenblas=0.3.9=h5ec1e0e_0
- libpng=1.6.37=hed695b0_1
- libsodium=1.0.17=h516909a_0
- libssh2=1.9.0=hab1572f_2
- libstdcxx-ng=9.2.0=hdf63c60_2
- libtiff=4.1.0=hc7e4089_6
- libunistring=0.9.10=h14c3975_0
- libuuid=2.32.1=h14c3975_1000
- libwebp-base=1.1.0=h516909a_3
- libxcb=1.13=h14c3975_1002
- libxml2=2.9.10=hee79883_0
- llvm-openmp=10.0.0=hc9558a2_0
- lz4-c=1.9.2=he1b5a44_1
- make=4.3=h516909a_0
- markupsafe=1.1.1=py37h8f50634_1
- mistune=0.8.4=py37h8f50634_1001
- nbconvert=5.6.1=py37hc8dfbb8_1
- nbformat=5.0.6=py_0
- ncurses=6.1=hf484d3e_1002
- notebook=6.0.3=py37hc8dfbb8_0
- openjdk=8.0.192=h516909a_1005
- openssl=1.1.1h=h516909a_0
- packaging=20.4=pyh9f0ad1d_0
- pandoc=2.9.2.1=0
- pandocfilters=1.4.2=py_1
- pango=1.42.4=h7062337_4
- parso=0.7.0=pyh9f0ad1d_0
- pcre=8.44=he1b5a44_0
- pexpect=4.8.0=py37hc8dfbb8_1
- pickleshare=0.7.5=py37hc8dfbb8_1001
- pip=20.1.1=pyh9f0ad1d_0
- pixman=0.38.0=h516909a_1003
- prometheus_client=0.7.1=py_0
- prompt-toolkit=3.0.5=py_0
- pthread-stubs=0.4=h14c3975_1001
- ptyprocess=0.6.0=py_1001
- pygments=2.6.1=py_0
- pyparsing=2.4.7=pyh9f0ad1d_0
- pyrsistent=0.16.0=py37h8f50634_0
- python=3.7.6=h8356626_5_cpython
- python-dateutil=2.8.1=py_0
- python_abi=3.7=1_cp37m
- pyzmq=19.0.1=py37hac76be4_0
- r=3.5.1=r35_1003
- r-argparser=0.6=r35_0
- r-askpass=1.1=r35hcdcec82_1
- r-assertthat=0.2.1=r35h6115d3f_1
- r-backports=1.1.6=r35hcdcec82_1
- r-base=3.5.1=hc461eb7_1012
- r-base64enc=0.1_3=r35hcdcec82_1003
- r-bh=1.72.0_3=r35h6115d3f_0
- r-biocmanager=1.30.10=r35h6115d3f_0
- r-bit=1.1_15.2=r35hcdcec82_0
- r-bit64=0.9_7=r35hcdcec82_1001
- r-bitops=1.0_6=r35hcdcec82_1003
- r-blob=1.2.1=r35h6115d3f_0
- r-bookdown=0.18=r35h6115d3f_0
- r-boot=1.3_25=r35h6115d3f_0
- r-brew=1.0_6=r35h6115d3f_1002
- r-broom=0.5.6=r35h6115d3f_0
- r-callr=3.4.3=r35h6115d3f_0
- r-caret=6.0_86=r35hcdcec82_1
- r-cellranger=1.1.0=r35h6115d3f_1002
- r-checkmate=2.0.0=r35hcdcec82_0
- r-circlize=0.4.9=r35h6115d3f_0
- r-class=7.3_17=r35hcdcec82_0
- r-cli=2.0.2=r35h6115d3f_0
- r-clipr=0.7.0=r35h6115d3f_0
- r-clue=0.3_57=r35h516909a_1
- r-cluster=2.1.0=r35h9bbef5b_2
- r-codetools=0.2_16=r35h6115d3f_1001
- r-colorspace=1.4_1=r35hcdcec82_1
- r-commonmark=1.7=r35hcdcec82_1001
- r-corrplot=0.84=r35_1002
- r-covr=3.5.0=r35h0357c0b_0
- r-cowplot=1.0.0=r35h6115d3f_1
- r-crayon=1.3.4=r35h6115d3f_1002
- r-crosstalk=1.1.0.1=r35h6115d3f_0
- r-curl=4.3=r35hcdcec82_0
- r-d3heatmap=0.6.1.2=r35h6115d3f_1002
- r-data.table=1.12.8=r35hcdcec82_0
- r-dataexplorer=0.8.0=r35h6115d3f_0
- r-dbi=1.1.0=r35h6115d3f_0
- r-dbplyr=1.4.3=r35h6115d3f_0
- r-dendextend=1.13.4=r35h6115d3f_0
- r-desc=1.2.0=r35h6115d3f_1002
- r-devtools=2.3.0=r35h6115d3f_0
- r-diffobj=0.2.4=r35hcdcec82_0
- r-digest=0.6.25=r35h0357c0b_1
- r-docopt=0.6.1=r35h6115d3f_1
- r-doparallel=1.0.15=r35h6115d3f_0
- r-dplyr=0.8.5=r35h0357c0b_0
- r-dt=0.13=r35h6115d3f_0
- r-ellipsis=0.3.0=r35hcdcec82_0
- r-essentials=3.5.1=r35_2001
- r-europepmc=0.3=r35h6115d3f_1001
- r-evaluate=0.14=r35h6115d3f_1
- r-fansi=0.4.1=r35hcdcec82_0
- r-farver=2.0.3=r35h0357c0b_0
- r-fastmap=1.0.1=r35h0357c0b_0
- r-fastmatch=1.1_0=r35hcdcec82_1004
- r-fdrtool=1.2.15=r35hcdcec82_1002
- r-forcats=0.5.0=r35h6115d3f_0
- r-foreach=1.5.0=r35h6115d3f_0
- r-foreign=0.8_76=r35hcdcec82_0
- r-formatr=1.7=r35h6115d3f_1
- r-formattable=0.2.0.1=r35_1001
- r-fs=1.4.1=r35h0357c0b_0
- r-futile.logger=1.4.3=r35h6115d3f_1002
- r-futile.options=1.0.1=r35h6115d3f_1001
- r-generics=0.0.2=r35h6115d3f_1002
- r-getoptlong=0.1.8=r35h6115d3f_0
- r-ggally=1.5.0=r35h6115d3f_0
- r-ggforce=0.3.1=r35h0357c0b_0
- r-ggplot2=3.3.0=r35h6115d3f_0
- r-ggplotify=0.0.5=r35h6115d3f_0
- r-ggraph=1.0.2=r35h0357c0b_1003
- r-ggrepel=0.8.2=r35h0357c0b_0
- r-ggridges=0.5.2=r35h6115d3f_1
- r-gh=1.1.0=r35h6115d3f_0
- r-gistr=0.5.0=r35h6115d3f_0
- r-git2r=0.26.1=r35h7253d3a_1
- r-glmnet=2.0_18=r35h9bbef5b_2
- r-globaloptions=0.1.1=r35_0
- r-glue=1.4.0=r35hcdcec82_0
- r-gmm=1.6_4=r35h9bbef5b_0
- r-gower=0.2.1=r35hcdcec82_1
- r-gridextra=2.3=r35h6115d3f_1002
- r-gridgraphics=0.5_0=r35h6115d3f_0
- r-gtable=0.3.0=r35h6115d3f_2
- r-haven=2.2.0=r35hde08347_0
- r-hdf5r=1.3.2=r35h3e93a20_0
- r-hexbin=1.28.1=r35h9bbef5b_0
- r-highr=0.8=r35h6115d3f_1
- r-hms=0.5.3=r35h6115d3f_0
- r-htmltools=0.4.0=r35h0357c0b_0
- r-htmlwidgets=1.5.1=r35h6115d3f_0
- r-httpuv=1.5.2=r35h0357c0b_1
- r-httr=1.4.1=r35h6115d3f_1
- r-igraph=1.2.5=r35hd626d4e_0
- r-imputelcmd=2.0=r351h6115d3f_0
- r-ini=0.3.1=r35h6115d3f_1002
- r-ipred=0.9_9=r35hcdcec82_1
- r-irdisplay=0.7=r35_1001
- r-irkernel=1.1=r35h6115d3f_0
- r-isoband=0.2.1=r35h0357c0b_0
- r-iterators=1.0.12=r35h6115d3f_0
- r-jsonlite=1.6.1=r35hcdcec82_0
- r-kernsmooth=2.23_17=r35hfa343cc_0
- r-knitr=1.28=r35h6115d3f_0
- r-labeling=0.3=r35h6115d3f_1002
- r-lambda.r=1.2.4=r35h6115d3f_0
- r-later=1.0.0=r35h0357c0b_0
- r-lattice=0.20_41=r35hcdcec82_1
- r-lava=1.6.7=r35h6115d3f_0
- r-lazyeval=0.2.2=r35hcdcec82_1
- r-lifecycle=0.2.0=r35h6115d3f_0
- r-lubridate=1.7.8=r35h0357c0b_0
- r-magrittr=1.5=r35h6115d3f_1002
- r-maldiquant=1.19.3=r35h516909a_1
- r-maps=3.3.0=r35hcdcec82_1003
- r-markdown=1.1=r35hcdcec82_0
- r-mass=7.3_51.6=r35hcdcec82_1
- r-matrix=1.2_18=r35h7fa42b6_2
- r-matrixstats=0.56.0=r35hcdcec82_0
- r-memoise=1.1.0=r35h6115d3f_1003
- r-mgcv=1.8_31=r35h7fa42b6_0
- r-mime=0.9=r35hcdcec82_0
- r-modelmetrics=1.2.2.2=r35h0357c0b_0
- r-modelr=0.1.7=r35h6115d3f_0
- r-munsell=0.5.0=r35h6115d3f_1002
- r-mvtnorm=1.1_0=r35h9bbef5b_0
- r-ncdf4=1.17=r35h6bc996b_2
- r-networkd3=0.4=r35h6115d3f_1003
- r-nlme=3.1_147=r35h9bbef5b_0
- r-nnet=7.3_14=r35hcdcec82_0
- r-norm=1.0_9.5=r35h9bbef5b_1002
- r-numderiv=2016.8_1.1=r35h6115d3f_1
- r-openssl=1.4.1=r35he5c4762_0
- r-patchwork=1.0.0=r35h6115d3f_0
- r-pbdzmq=0.3_3=r35h559a7a4_1002
- r-pheatmap=1.0.12=r35h6115d3f_1
- r-pillar=1.4.3=r35h6115d3f_0
- r-pkgbuild=1.0.7=r35h6115d3f_0
- r-pkgconfig=2.0.3=r35h6115d3f_0
- r-pkgload=1.0.2=r35h0357c0b_1001
- r-plogr=0.2.0=r35h6115d3f_1002
- r-plotly=4.9.2.1=r35h6115d3f_0
- r-plyr=1.8.6=r35h0357c0b_0
- r-png=0.1_7=r35hcdcec82_1003
- r-polyclip=1.10_0=r35h0357c0b_1
- r-praise=1.0.0=r35h6115d3f_1003
- r-prettyunits=1.1.1=r35h6115d3f_0
- r-proc=1.16.2=r35h0357c0b_0
- r-processx=3.4.2=r35hcdcec82_0
- r-prodlim=2019.11.13=r35h0357c0b_0
- r-progress=1.2.2=r35h6115d3f_1
- r-promises=1.1.0=r35h0357c0b_0
- r-pryr=0.1.4=r35h0357c0b_1003
- r-ps=1.3.2=r35hcdcec82_0
- r-purrr=0.3.4=r35hcdcec82_0
- r-quantmod=0.4.17=r35h6115d3f_0
- r-r6=2.4.1=r35h6115d3f_0
- r-randomforest=4.6_14=r35h9bbef5b_1002
- r-rappdirs=0.3.1=r35hcdcec82_1003
- r-rbokeh=0.5.0=r35h6115d3f_1002
- r-rcmdcheck=1.3.3=r35h6115d3f_2
- r-rcolorbrewer=1.1_2=r35h6115d3f_1002
- r-rcpp=1.0.4.6=r35h0357c0b_0
- r-rcppeigen=0.3.3.7.0=r35h0357c0b_0
- r-rcurl=1.98_1.2=r35hcdcec82_0
- r-readr=1.3.1=r35h0357c0b_1002
- r-readxl=1.3.1=r35hde08347_3
- r-recipes=0.1.12=r35h6115d3f_0
- r-recommended=3.5.1=r35_1003
- r-rematch=1.0.1=r35h6115d3f_1002
- r-rematch2=2.1.2=r35h6115d3f_0
- r-remotes=2.1.1=r35h6115d3f_0
- r-repr=1.1.0=r35h6115d3f_0
- r-reprex=0.3.0=r35h6115d3f_1
- r-reshape=0.8.8=r35hcdcec82_1
- r-reshape2=1.4.4=r35h0357c0b_0
- r-rex=1.2.0=r35h6115d3f_0
- r-rjava=0.9_12=r35hcdcec82_0
- r-rjson=0.2.20=r35h0357c0b_1001
- r-rlang=0.4.5=r35hcdcec82_2
- r-rmarkdown=2.1=r35h6115d3f_0
- r-roxygen2=7.1.0=r35h0357c0b_0
- r-rpart=4.1_15=r35hcdcec82_1
- r-rprojroot=1.3_2=r35h6115d3f_1002
- r-rsqlite=2.2.0=r35h0357c0b_0
- r-rstudioapi=0.11=r35h6115d3f_0
- r-rvcheck=0.1.8=r35h6115d3f_0
- r-rversions=2.0.1=r35h6115d3f_0
- r-rvest=0.3.5=r35h6115d3f_0
- r-sandwich=2.5_1=r35h6115d3f_1
- r-scales=1.1.0=r35h6115d3f_0
- r-selectr=0.4_2=r35h6115d3f_0
- r-session=1.0.3=r351_0
- r-sessioninfo=1.1.1=r35h6115d3f_1001
- r-shape=1.4.4=r35_1002
- r-shiny=1.4.0.2=r35h6115d3f_0
- r-shinydashboard=0.7.1=r35h6115d3f_1001
- r-snow=0.4_3=r35h6115d3f_1001
- r-sourcetools=0.1.7=r35he1b5a44_1001
- r-spatial=7.3_12=r35hcdcec82_0
- r-squarem=2020.2=r35h6115d3f_0
- r-stringi=1.4.6=r35h0e574ca_1
- r-stringr=1.4.0=r35h6115d3f_1
- r-survival=3.1_12=r35hcdcec82_0
- r-sys=3.3=r35hcdcec82_0
- r-testthat=2.3.2=r35h0357c0b_0
- r-tibble=3.0.1=r35hcdcec82_0
- r-tidyr=1.0.2=r35h0357c0b_0
- r-tidyselect=1.0.0=r35h6115d3f_0
- r-tidyverse=1.3.0=r35h6115d3f_1
- r-timedate=3043.102=r35h6115d3f_1001
- r-tinytex=0.22=r35h6115d3f_0
- r-tmvtnorm=1.4_10=r35h9bbef5b_1002
- r-triebeard=0.3.0=r35he1b5a44_1002
- r-ttr=0.23_6=r35hcdcec82_0
- r-tweenr=1.0.1=r35h0357c0b_1001
- r-upsetr=1.4.0=r35h6115d3f_1
- r-urltools=1.7.3=r35h0357c0b_1
- r-usethis=1.6.1=r35h6115d3f_0
- r-utf8=1.1.4=r35hcdcec82_1002
- r-uuid=0.1_4=r35hcdcec82_0
- r-vctrs=0.2.4=r35hcdcec82_0
- r-viridis=0.5.1=r35h6115d3f_1003
- r-viridislite=0.3.0=r35h6115d3f_1002
- r-whisker=0.4=r35h6115d3f_0
- r-withr=2.2.0=r35h6115d3f_0
- r-xfun=0.13=r35h6115d3f_0
- r-xml=3.99_0.3=r35hcdcec82_0
- r-xml2=1.3.2=r35h0357c0b_0
- r-xopen=1.0.0=r35h6115d3f_1002
- r-xtable=1.8_4=r35h6115d3f_2
- r-xts=0.12_0=r35hcdcec82_0
- r-yaml=2.2.1=r35hcdcec82_0
- r-zeallot=0.1.0=r35h6115d3f_1001
- r-zoo=1.8_7=r35hcdcec82_0
- readline=8.0=hf8c457e_0
- send2trash=1.5.0=py_0
- setuptools=46.4.0=py37hc8dfbb8_0
- six=1.14.0=py_1
- sqlite=3.30.1=hcee41ef_0
- terminado=0.8.3=py37hc8dfbb8_1
- testpath=0.4.4=py_0
- tk=8.6.10=hed695b0_0
- tktable=2.10=h555a92e_3
- tornado=6.0.4=py37h8f50634_1
- traitlets=4.3.3=py37hc8dfbb8_1
- wcwidth=0.1.9=pyh9f0ad1d_0
- webencodings=0.5.1=py_1
- wget=1.20.1=h22169c7_0
- wheel=0.34.2=py_1
- xorg-kbproto=1.0.7=h14c3975_1002
- xorg-libice=1.0.10=h516909a_0
- xorg-libsm=1.2.3=h84519dc_1000
- xorg-libx11=1.6.9=h516909a_0
- xorg-libxau=1.0.9=h14c3975_0
- xorg-libxdmcp=1.1.3=h516909a_0
- xorg-libxext=1.3.4=h516909a_0
- xorg-libxrender=0.9.10=h516909a_1002
- xorg-renderproto=0.11.1=h14c3975_1002
- xorg-xextproto=7.3.0=h14c3975_1002
- xorg-xproto=7.0.31=h14c3975_1007
- xz=5.2.5=h516909a_0
- zeromq=4.3.2=he1b5a44_2
- zipp=3.1.0=py_0
- zlib=1.2.11=h516909a_1006
- zstd=1.4.4=h6597ccf_3
prefix: /projects/conda/groups/bioinfo/envs/ms_BioC3.8
##======================================================================================================================
## Differential gene expression analysis workflow
##======================================================================================================================
## Define paths to data and scripts
export PRJ_NAME="schuhmacher_binding_sites"
# screen -R ${PRJ_NAME}
export PRJ_DATA="/project/data/ngs_data"
export PRJ_SCRIPTS="/project/scripts/${PRJ_NAME}"
export NGS_TOOLS="/project/scripts/ngs_tools"
export IGENOME="/project/igenome/Homo_sapiens/Ensembl_v99/GRCh38_p13"
export DGE_CONDA_ENV="/project/conda/envs/dge_BioC3.8"
umask u=rwx,g=rwx,o=
ls "${PRJ_DATA}" "${PRJ_SCRIPTS}" "${NGS_TOOLS}" "${IGENOME}" "${DGE_CONDA_ENV}" >/dev/null || { echo "not all project resources are well defined" 1>&2; exit 1; }
## Define functions, tools and further settings
export PATH=${NGS_TOOLS}/dge_workflow:$PATH
source ${NGS_TOOLS}/dge_workflow/dge_utils.sh
mcdir(){
if [ ! -d "$1" ]; then
mkdir -p "$1";
fi;
cd "$1";
}
export -f mcdir
## activate conda environment
conda activate ${DGE_CONDA_ENV}
##======================================================================================================================
## deposit original fastq files and sample description files in 'originals' folder
mcdir ${PRJ_DATA}/originals
##----------------------------------------------------------------------------------------------------------------------
## rename and merge lane replicates
mcdir ${PRJ_DATA}/lanereps_pooled
## create renaming scheme
echo '
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.45/R/core_commons.R")
library(data.table)
sheetFile <- "../originals/schuhmacher_files.txt"
renaming_scheme <- fread("sample_naming.txt") %>% as.data.frame()
sampleSheet <- read_tsv(sheetFile) %>%
transmute(filename, sample_name = str_replace(sample_name, "-", "")) %>%
left_join(renaming_scheme, by = c("sample_name" = "old")) %>%
mutate(replicate = str_match(new, "[0-9]$")) %>%
mutate(replicate = ifelse(is.na(replicate), "1", replicate)) %>%
transmute(filename,
condition = str_replace(new, "[0-9]", ""),
replicate = paste(condition, replicate, sep ="_"),
sample_type = str_match(condition, "^[:alpha:]+"))
write_tsv(sampleSheet, path="renaming_scheme.txt")
sampleSheet %>% group_by(replicate) %>% summarise(
zcat=paste("zcat", paste(paste0("../originals/", filename), collapse=" "), "| gzip -c >", paste0(replicate[1], ".fastq.gz"))
) %$%
zcat %>%
write_lines("lane_merge.cmd")
sampleSheet %>% transmute(condition, replicate, prep_day = paste0("batch_", str_match(replicate, "[0-9]$")), seq_batch = ifelse(prep_day == "batch_1", "test", "final")) %>%
distinct() %>%
write_tsv("basic_design.txt")
' | R --vanilla -q
## merge lane replicates
cat lane_merge.cmd | while read line; do
# eval ${line}
jl submit -j .repmerge "$line"
done
jl wait --email --report
## perform basic QC of merged fastq files using FastQC v0.11.2
dge_fastqc $(ls *fastq.gz) &