Skip to content
Snippets Groups Projects
Commit 4407c214 authored by Holger Brandl's avatar Holger Brandl
Browse files

cont. rna-seq workflow

parent 9f749a60
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env Rscript
#+ echo=FALSE, message=FALSE
suppressMessages(library(docopt))
# retrieve and parse the command-line arguments
doc <- '
Create a small summary for bam-files in a directory
Usage: bam_qc.R <base_directory>
Options:
-c Peform correlation analysis
'
#print(paste("args are:", commandArgs(TRUE)))
opts <- docopt(doc, commandArgs(TRUE))
#opts <- docopt(doc, ". .")
if(!exists("opts")){ stop(doc); return }
#print("doc opts are")
#print(opts)
baseDir <- normalizePath(opts$base_directory)
if(is.na(file.info(baseDir)$isdir)){
stop(paste("base directory", baseDir, "does not exist"))
}
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/core_commons.R")
devtools::source_url("https://dl.dropboxusercontent.com/u/113630701/datautils/R/ggplot_commons.R")
########################################################################################################################
#' # Mapping Summary for: `r baseDir`
parseAlgnSummary_T2_0_11 <- function(alignSummary){
#alignSummary="/projects/bioinfo/holger/projects/marta_rnaseq/human_leipzig/mapping/S5382_aRG_1b_rep1/align_summary.txt"
algnData <- readLines(alignSummary)
data.frame(
condition=basename(dirname(alignSummary)),
num_reads=as.numeric(str_match(algnData[2], " ([0-9]*$)")[,2]),
mapped_reads=as.numeric(str_match(algnData[3], ":[ ]*([0-9]*) ")[,2][1])
) %>% transform(mapping_efficiency=100*mapped_reads/num_reads)
}
algnSummary <- ldply(list.files(".", "align_summary.txt", full.names=TRUE, recursive=T), parseAlgnSummary_T2_0_11)
write.delim(algnSummary, file="tophat_mapping_stats.txt")
# algnSummary <- read.delim("algnSummary.txt")
#' [Tophat Mapping Statistics](tophat_mapping_stats.txt)
scale_fill_discrete <- function (...){ scale_color_brewer(..., type = "seq", palette="Set1", "fill", na.value = "grey50") }
ggplot(algnSummary, aes(condition, mapping_efficiency)) +
geom_bar(stat="identity") +
coord_flip() +
ylim(0,100) +
ggtitle("mapping efficiency")
ggplot(algnSummary, aes(condition, num_reads)) +
geom_bar(stat="identity") +
coord_flip() +
ggtitle("read counts") +scale_y_continuous(labels=comma)
ggplot(algnSummary, aes(condition, mapped_reads)) +
geom_bar(stat="identity") + coord_flip() +
ggtitle("alignments counts") +
scale_y_continuous(labels=comma)
#ggplot(melt(algnSummary), aes(condition, value)) + geom_bar(stat="identity") +facet_wrap(~variable, scales="free") + ggtitle("mapping summary") + scale_y_continuous(labels=comma) + theme(axis.text.x=element_text(angle=90, hjust=0))
#ggsave2(w=10, h=10, p="mapstats")
########################################################################################################################
#' ## Alignment Correlation
#' Without using any transcriptome as reference, the genome can be binned and alignment counts per bin can be used to perform a correlation analysis.
#' Used tool (deepTools)[https://github.com/fidelram/deepTools]
### todo integrate bam correlation analysis
\ No newline at end of file
......@@ -38,17 +38,20 @@ Usage: dge_analysis.R <report_name>
Options:
--cuffdir <DIR> Cache results [default: .]
--dge-pairs <dgepairs> Txt files with sample pairs for which dge analysis should be performed
-S Dont do general statistics and qc analysis
'
#opts <- docopt(doc)
opts <- docopt(doc, "test_report")
cuffdb_directory=normalizePath(opts$cuffdir)
mcdir(opts$report_name)
print("options were:")
print(opts)
# print(opts)
cuffdb_directory <- normalizePath(opts$cuffdir)
mcdir(opts$report_name)
if(is.na(file.info(cuffdb_directory)$isdir)){
stop(paste("db directory does not exist", doc))
......@@ -70,8 +73,7 @@ cuff <- readCufflinks(dir=tmpdir)
cuff
runInfo(cuff)
replicates(cuff)
replicates(cuff) %>% mutate(file=basename(file)) %>% select(-c(total_mass, norm_mass, internal_scale, external_scale))
#######################################################################################################################
......@@ -108,7 +110,10 @@ csVolcanoMatrix(genes(cuff))
require.auto(edgeR)
plotMDS(repFpkmMatrix(genes(cuff)), top=500, main="top500 MDS")
MDSplot(genes(cuff)) + ggtitle("mds plot")
if(unlen(replicates(cuff)$sample)>2){ ## >2 only, because MDS will fail with just 2 samples
MDSplot(genes(cuff)) + ggtitle("mds plot")
}
MDSplot(genes(cuff), replicates=T) + ggtitle("mds plot")
......@@ -128,7 +133,9 @@ csDendro(genes(cuff),replicates=T)
#######################################################################################################################
#' ## Extract and save tables
#' ## Raw data tables
## merge in basic gene info
#xls(runInfo(cuff))
#setwd("/Volumes/project-zeigerer/Rab5_NGS/results/")
......@@ -136,17 +143,25 @@ csDendro(genes(cuff),replicates=T)
geneFpkm<-fpkm(genes(cuff))
write.delim(geneFpkm, file="geneFpkm.txt")
# gene.fpkm <- read.delim("gene.fpkm.txt")
#' [FPKMs](geneFpkm.txt)
geneCounts<-count(genes(cuff))
write.delim(geneCounts, file="geneCounts.txt")
geneCgene.matrix<-fpkmMatrix(genes(cuff))
repGeneFPKMs <- repFpkmMatrix(genes(cuff))
save(repGeneFPKMs, file="repGeneFPKMs.RData")
write.delim(repGeneFPKMs, file="repGeneFPKMs.txt")
# repGeneFPKMs <- read.delim("repGeneFPKMs.txt")
# repGeneFPKMs <- local(get(load("repGeneFPKMs.RData")))
#' [FPKMs by Replicate](repGeneFPKMs.txt)
geneCounts<-count(genes(cuff))
max(geneCounts$count)
write.delim(geneCounts, file="geneCounts.txt")
geneCgene.matrix<-fpkmMatrix(genes(cuff))
#' [Raw Counts](geneCounts.txt)
repCounts <- countMatrix(genes(cuff))
write.delim(repCounts, file="repCounts.txt")
# repCounts <- read.delim("repCounts.txt")
#' [Raw Counts by Replicate](repCounts.txt)
#######################################################################################################################
......
......@@ -26,7 +26,7 @@ export PATH=/home/brandl/bin/cufflinks-2.2.1.Linux_x86_64:$PATH
#fastqFiles=$(ls /projects/bioinfo/holger/projects/helin/all_trep_pooled/dog_*)
fastqFiles=<<TBD>>
fastqFiles=<<<<TBD>>>>
#ll $fastqFiles
## do some qc for the reads
......@@ -39,18 +39,22 @@ mailme "$project: fastqc done"
########################################################################################################################
### Map the data and do cuffdiff
### Align the reads
mcdir $baseDir/mapped
#igenome=/projects/bioinfo/igenomes/Canis_familiaris/Ensembl/CanFam3.1
igenome=<<TBD>>
igenome=<<<<TBD>>>>
dge_tophat_se -i $igenome $fastqFiles 2>&1 | tee mapped.log
mailme "$project: mapping done"
########################################################################################################################
#### Basic Alginment QC and technical replicate grouping
##TBD
########################################################################################################################
### Do the differential expression analysis
......@@ -61,7 +65,7 @@ gtfFile=$igenome/Annotation/Genes/genes.gtf
## define labels to split bam files into replicate groups
#labels="big_cyst,small_cyst"
labels=<<TBD>>
labels=<<<<TBD>>>>
dge_cuffdiff $gtfFile $baseDir/mapped $labels
MakeCuffDB $gtfFile "NAN"
......
#!/usr/bin/env Rscript
# similar http://stackoverflow.com/questions/10943695/what-is-the-knitr-equivalent-of-r-cmd-sweave-myfile-rnw
#http://stackoverflow.com/questions/3433603/parsing-command-line-arguments-in-r-scripts
#https://github.com/edwindj/docopt.R
#http://www.slideshare.net/EdwindeJonge1/docopt-user2014
......@@ -23,10 +25,10 @@ Options:
'
#docopt(doc, "-w test a b c ")$"-w"
opts <- docopt(doc)
#print(opts)
spin_opts <- docopt(doc)
#print(spin_opts)
r_script <- opts$r_script
r_script <- spin_opts$r_script
if(!file.exists(r_script)){
stop(paste("file does not exist\n", doc))
......@@ -42,9 +44,11 @@ options(width=150)
#https://groups.google.com/forum/#!topic/knitr/ojcnq5Nm298
## better table css: http://www.stat.ubc.ca/~jenny/STAT545A/topic10_tablesCSS.html
commandArgs <- function(trailingOnly = FALSE){ return(opts$script_args) }
print("trimmed args are")
print(commandArgs())
commandArgs <- function(trailingOnly = FALSE){ return(as.character(spin_opts$script_args)) }
#print("trimmed args are")
#print(commandArgs())
#print("end args")
spin(r_script, knit=F)
mdScript <- str_replace(r_script, "[.]R$", ".Rmd")
......@@ -63,10 +67,10 @@ cssHeader='
## custom title http://stackoverflow.com/questions/14124022/setting-html-meta-elements-with-knitr
opts_chunk$set(
cache = opts$"-c",
message= opts$"-m",
warning= opts$"-w",
echo= opts$"-e",
cache = spin_opts$"-c",
message= spin_opts$"-m",
warning= spin_opts$"-w",
echo= spin_opts$"-e",
fig.width=15,
width=200
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment