Skip to content
Snippets Groups Projects
Commit 37bf0348 authored by Holger Brandl's avatar Holger Brandl
Browse files

started chipseq workflow

parent d6cd88e6
No related branches found
No related tags found
No related merge requests found
...@@ -6,7 +6,7 @@ suppressMessages(library(docopt)) ...@@ -6,7 +6,7 @@ suppressMessages(library(docopt))
# retrieve and parse the command-line arguments # retrieve and parse the command-line arguments
doc <- ' doc <- '
Create a small summary for bam-files in a directory Create a small summary for bam-files in a directory
Usage: bam_qc.R <base_directory> Usage: bams_qc.R <base_directory>
Options: Options:
-c Peform correlation analysis -c Peform correlation analysis
......
...@@ -19,7 +19,7 @@ Options: ...@@ -19,7 +19,7 @@ Options:
# Stefania: opts <- docopt(doc, "--undirected --pcutoff 0.05 ..") # Stefania: opts <- docopt(doc, "--undirected --pcutoff 0.05 ..")
# opts <- docopt(doc, "--undirected ..") # opts <- docopt(doc, "--undirected ..")
# opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), "..")) # opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), ".."))
## todo use commandArgs here!!
opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" "))) opts <- docopt(doc, paste(Sys.getenv("DGE_PARAMS"), paste(commandArgs(TRUE), collapse=" ")))
#opts #opts
...@@ -38,11 +38,11 @@ if(is.na(file.info(cuffdb_directory)$isdir)){ ...@@ -38,11 +38,11 @@ if(is.na(file.info(cuffdb_directory)$isdir)){
} }
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/ggplot_commons.R")
require(cummeRbund) require(cummeRbund)
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/bio/diffex_commons.R") devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/bio/diffex_commons.R")
#knitr::opts_knit$set(root.dir = getwd()) #knitr::opts_knit$set(root.dir = getwd())
...@@ -348,6 +348,7 @@ sigEnrResults %>% do({ ...@@ -348,6 +348,7 @@ sigEnrResults %>% do({
print(logPlot) print(logPlot)
}) })
ggsave2()
#+ include=FALSE #+ include=FALSE
#ggsave2(ggplot(sigEnrResults, aes(set)) + geom_bar() + ggtitle("enriched term frequencies")) # + facet_wrap(~site_type)) #ggsave2(ggplot(sigEnrResults, aes(set)) + geom_bar() + ggtitle("enriched term frequencies")) # + facet_wrap(~site_type))
......
...@@ -162,7 +162,7 @@ ziprm tophat_logs ${project}__tophat__*.log ...@@ -162,7 +162,7 @@ ziprm tophat_logs ${project}__tophat__*.log
dge_bam_correlate . dge_bam_correlate .
## create tophat mapping report ## create tophat mapping report
spin.R $DGE_HOME/bam_qc.R . spin.R $DGE_HOME/tophat_qc.R .
mailme "$project: tophat done in $(pwd)" mailme "$project: tophat done in $(pwd)"
...@@ -194,7 +194,7 @@ export -f dge_bam_correlate ...@@ -194,7 +194,7 @@ export -f dge_bam_correlate
## Merge technical replicatews ## Merge technical replicatews
dge_merge_treps(){ dge_merge_treps(){
usage="Usage: dge_bam_correlate <bam_directory> <comma_sep_biosample_spec>" usage="Usage: dge_merge_treps <bam_directory> <comma_sep_biosample_spec>"
if [ $# -ne 2 ]; then if [ $# -ne 2 ]; then
echo $usage >&2 ; return; echo $usage >&2 ; return;
...@@ -221,6 +221,7 @@ for sample in $(echo $bio_samples | tr ",", " "); do ...@@ -221,6 +221,7 @@ for sample in $(echo $bio_samples | tr ",", " "); do
sampleBams=$(find $bam_dir -name '*bam' | grep -v unmapped | grep $sample) sampleBams=$(find $bam_dir -name '*bam' | grep -v unmapped | grep $sample)
echo "merging $sample with $sampleBams" echo "merging $sample with $sampleBams"
## todo add read-groups to bam files
if [ 1 -eq $(echo "$sampleBams" | wc -l) ]; then if [ 1 -eq $(echo "$sampleBams" | wc -l) ]; then
cp $sampleBams $sample.bam cp $sampleBams $sample.bam
else else
...@@ -239,6 +240,26 @@ export -f dge_merge_treps ...@@ -239,6 +240,26 @@ export -f dge_merge_treps
#dge_cd(){
#( ## required to work around docopt sys.exit
#usage='
#Use cuffdiff2 to performa a differntial expression analysis
#Usage: dge_cuffdiff [--exclude=<regex>] <gtfFile> <bamDir> <labels>
#
#Options:
#--exclude=<regex> Exclude patterns (grep regex matching bam file paths to be excluded)
#'
#echo "$usage" | ~/bin/docopts/docopts -h - : $*
#eval "$(echo "$usage" | ~/bin/docopts/docopts -h - : $*)"
#
#)
#}
#dge_cd $gtfFilePC $baseDir/mapped $labels
#dge_cd --help
dge_cuffdiff(){ dge_cuffdiff(){
local gtfFile=$1 local gtfFile=$1
...@@ -259,6 +280,12 @@ fi ...@@ -259,6 +280,12 @@ fi
## collect all bam-files and group them ## collect all bam-files and group them
allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort) allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | sort)
if [ -n "$EXCLUDE_BAMS_PATTERN" ]; then
echo "excluding bams with $EXCLUDE_BAMS_PATTERN"
allBams=$(echo "$allBams" | grep -v "$EXCLUDE_BAMS_PATTERN")
fi
## todo use optional argument here --> default "unmapped" --> allow user to add others ## todo use optional argument here --> default "unmapped" --> allow user to add others
#allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "1029" | sort) #allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "1029" | sort)
#allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "Ctrl_1029" | sort) #allBams=$(find $bamDir | grep ".bam$" | grep -v "unmapped" | grep -v "Ctrl_1029" | sort)
......
#!/usr/bin/env Rscript
#+ echo=FALSE, message=FALSE
suppressMessages(library(docopt))
# retrieve and parse the command-line arguments
doc <- '
Create a small summary alignment summary files created when running tophat2.
Usage: bams_qc.R <base_directory>
Options:
-c Peform correlation analysis
'
#print(paste("args are:", commandArgs(TRUE)))
opts <- docopt(doc, commandArgs(TRUE))
#opts <- docopt(doc, ". .")
if(!exists("opts")){ stop(doc); return }
#print("doc opts are")
#print(opts)
baseDir <- normalizePath(opts$base_directory)
if(is.na(file.info(baseDir)$isdir)){
stop(paste("base directory", baseDir, "does not exist"))
}
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R")
########################################################################################################################
#' # Mapping Summary for: `r baseDir`
parseAlgnSummary_T2_0_11 <- function(alignSummary){
#alignSummary="/projects/bioinfo/holger/projects/marta_rnaseq/human_leipzig/mapping/S5382_aRG_1b_rep1/align_summary.txt"
algnData <- readLines(alignSummary)
data.frame(
condition=basename(dirname(alignSummary)),
num_reads=as.numeric(str_match(algnData[2], " ([0-9]*$)")[,2]),
mapped_reads=as.numeric(str_match(algnData[3], ":[ ]*([0-9]*) ")[,2][1])
) %>% transform(mapping_efficiency=100*mapped_reads/num_reads)
}
algnSummary <- ldply(list.files(".", "align_summary.txt", full.names=TRUE, recursive=T), parseAlgnSummary_T2_0_11)
write.delim(algnSummary, file="tophat_mapping_stats.txt")
# algnSummary <- read.delim("algnSummary.txt")
#' [Tophat Mapping Statistics](tophat_mapping_stats.txt)
scale_fill_discrete <- function (...){ scale_color_brewer(..., type = "seq", palette="Set1", "fill", na.value = "grey50") }
ggplot(algnSummary, aes(condition, mapping_efficiency)) +
geom_bar(stat="identity") +
coord_flip() +
ylim(0,100) +
ggtitle("mapping efficiency")
ggplot(algnSummary, aes(condition, num_reads)) +
geom_bar(stat="identity") +
coord_flip() +
ggtitle("read counts") +scale_y_continuous(labels=comma)
ggplot(algnSummary, aes(condition, mapped_reads)) +
geom_bar(stat="identity") + coord_flip() +
ggtitle("alignments counts") +
scale_y_continuous(labels=comma)
#ggplot(melt(algnSummary), aes(condition, value)) + geom_bar(stat="identity") +facet_wrap(~variable, scales="free") + ggtitle("mapping summary") + scale_y_continuous(labels=comma) + theme(axis.text.x=element_text(angle=90, hjust=0))
#ggsave2(w=10, h=10, p="mapstats")
##todo multimapper counting
########################################################################################################################
#' ## Alignment Correlation
#' Without using any transcriptome as reference, the genome can be binned and alignment counts per bin can be used to perform a correlation analysis.
#' Used tool [deepTools](https://github.com/fidelram/deepTools)
### todo integrate bam correlation analysis
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment