Commit 4827ca27 authored by Lena Hersemann's avatar Lena Hersemann

finished basic integration of dupradar

parent ceffa1b9
......@@ -33,7 +33,7 @@ if [[ ! -f $picardJar ]]; then
exit 1
fi
#export JL_FORCE_LOCAL=true
export JL_FORCE_LOCAL=true
for bamFile in $bamFiles ; do
# DEBUG bamFile=$(ls $bamFiles | head -n1)
......@@ -41,15 +41,62 @@ for bamFile in $bamFiles ; do
bamBaseName=$(basename $bamFile .bam)
jl submit -j .dupanlysis -n "dupanalysis__${bamBaseName}" "java -jar -Xmx5g $picardJar MarkDuplicates I=${bamFile} O=${outputDir}/${bamBaseName}.markdup.bam M=${outputDir}/${bamBaseName}.markdup.txt"
jl submit -j .dupanlysis -t 5 -n "dupanalysis__${bamBaseName}" "java -jar -Xmx5g $picardJar MarkDuplicates I=${bamFile} O=${outputDir}/${bamBaseName}.markdup.bam M=${outputDir}/${bamBaseName}.markdup.txt"
done
jl wait --report .dupanlysis
jl wait --report .dupanlysis || { echo "duplicate analysis failed" >&2; exit 1; }
#rend.R ${NGS_TOOLS}/misc/fastqc_summary.R $outputDir
## todo make sure data is single-end or provide pe options to dupRadar (see https://www.biostars.org/p/178730/)
rendr_snippet - <<"EOF"
print("todo")
## todo expose gtf as script parameter
gtfFile=${IGENOME}/Annotation/Genes/genes.gtf
# R --args $gtfFile ${outputDir}/*.markdup.bam
rendr_snippet "duplicate_anlysis" <<"EOF" $gtfFile ${outputDir}/*.markdup.bam
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.40/R/core_commons.R")
load_pack(dupRadar)
# source("https://bioconductor.org/biocLite.R")
# biocLite("dupRadar")
gtfFile = commandArgs(TRUE)[1]
bamFiles = commandArgs(TRUE)[-1]
# bamFiles %<>% head(2)
dupResults = bamFiles %>% map(function(bamFile){
# DEBUG bamFile=bamFiles[1]
# bamFile <- "../alignments/marked_duplicates_10_AP.bam"
# gtfFile <- "../../../igenomes/Homo_sapiens/Ensembl_v88_custom/GRCh38/Annotation/Genes/genes.gtf"
analyzeDuprates(bamFile, gtfFile, stranded = 0, paired = FALSE, threads = 10)
})
install_package("session")
session::save.session(".test_bam.dat");
# session::restore.session(".test_bam.dat");
bam_to_sample = function(x) basename(x) %>% trim_ext(".markdup.bam")
walk2(bamFiles, dupResults, ~ {
duprateExpDensPlot(.y,
pal = c("cyan", "blue", "green", "yellow", "red"),
tNoAlternative = TRUE,
tRPKM = TRUE,
tRPKMval = 0.5,
tFit = TRUE,
addLegend = TRUE,
main=paste0("duplicate results for '", bam_to_sample(.x), "'")) %>% print
})
# getDupMatStats(DupMat=dupResults[[1]]) %>% vec_as_df %>% spread(name, value)
sumStats = map2_df(bamFiles, dupResults, ~ getDupMatStats(DupMat=.y) %>% vec_as_df %>% spread(name, value) %>% mutate(sample=bam_to_sample(.x)))
sumStats %>% table_browser()
write_tsv(sumStats, path="duplicate_anlysis.summary_stats.tsv")
# sumStats %>% ggplot(aes())
EOF
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment