Commit b485c9e1 authored by Holger Brandl's avatar Holger Brandl

cont. mf deseq

parent ffc16c40
......@@ -14,6 +14,7 @@ Options:
--pcutoff <pcutoff> Override q-value filter and filter by p-value instead
--min_count <min_count> Minimal expression in any of the sample to be included in the final result list [default: 10]
--project <project_prefix> Name to prefix all generated result files [default: ]
--design <exp_design> Name to prefix all generated result files [default: sample]
'
opts <- docopt(doc, commandArgs(TRUE))
......@@ -111,7 +112,7 @@ if(!is.null(contrasts_file)){
#' The used design matrix is
contrasts %>% kable()
replicates2samples %>% kable()
#' The contrasts of interest are
contrasts %>% kable()
......@@ -130,23 +131,33 @@ contrasts %>% kable()
#colData <- data.frame(condition=colnames(countMatrix) %>% get_sample_from_replicate)
## new mf-approach
colData <- replicates2samples %>%data.frame(replciate=colnames(countMatrix) %>% get_sample_from_replicate) %>%
arrange(replicate)
colData %<>% rename(sample, sample_2ndwt)
colData <- data_frame(replicate=colnames(countMatrix)) %>% mutate(col_index=row_number()) %>%
full_join(replicates2samples, by="replicate") %>%
arrange(col_index) #%>% transmute(condition=sample_2ndwt) %>% fac2char
## infer the sample to be used from the formula
#designFormula <- "sample_2ndwt + batch"
designFormula <- opts$design
## consider last one as sample attribute
sampleAttribute <- str_split(designFormula, " ")
colData
#colData %<>% rename(sample, sample_2ndwt)
#colData <- replicates2samples %>% arrange(colnames(countMatrix))
# valiadate that contrasts model is compatible with data
%<>%
if(!all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)){
echo("column model: ",colData$condition)
echo("contrasts: ", contrasts %>% gather %$% value %>% unique)
stop("column model is not consistent with contrasts")
}
#if(!all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)){
# echo("column model: ",colData$condition)
# echo("contrasts: ", contrasts %>% gather %$% value %>% unique)
# stop("column model is not consistent with contrasts")
#}
#stopifnot(all((contrasts %>% gather %$% value %>% unique) %in% colData$condition))
dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(~ condition))
#http://www.cookbook-r.com/Formulas/Creating_a_formula_from_a_string/
dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(as.formula("~ batch + sample_2ndwt")))
#dds <- estimateSizeFactors(dds)
#dds <- estimateDispersions(dds)
dds <- DESeq(dds)
......@@ -177,7 +188,7 @@ plotDispEsts(dds, main="Dispersion plot")
# Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "condition")
plotPCA(rld, intgroup = "sample")
#' The Euclidean distance between samples are calculated after performing the regularized log transformation.
......
#!/usr/bin/env kscript
//DEPS com.offbytwo:docopt:0.6.0.20150202 de.mpicbg.scicomp:joblist:0.6 de.mpicbg.scicomp.bioinfo:kutils:0.1-SNAPSHOT
//DEPS de.mpicbg.scicomp:joblist:0.6 de.mpicbg.scicomp.bioinfo:kutils:0.1-SNAPSHOT
// add docopts to local m2 index
......@@ -92,6 +92,9 @@ for (fastqFile in fastqFiles) {
val fastqBaseName = fastqFile.name.removeSuffix(".gz").removeSuffix(".fastq")
val optionalZcat = if (fastqFile.name.endsWith("gz")) "--readFilesCommand zcat" else ""
// todo consider to use --outTmpDir which defaults to outFileNamePrefix STARtmp and which is deleted automatically
// or use process substiutation in case of zipped reads (see https://github.com/alexdobin/STAR/issues/143#issuecomment-216597465)
val cmd = """
STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 ${optionalZcat} --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts --outFilterMultimapNmax 1 --outSJfilterCountUniqueMin 8 3 3 3
mv ${fastqBaseName}.Aligned.sortedByCoord.out.bam ${fastqBaseName}.bam
......@@ -101,7 +104,7 @@ for (fastqFile in fastqFiles) {
// todo provide proper walltime here
// slurm memory limit https://rc.fas.harvard.edu/resources/documentation/slurm-memory/
// sacct -o MaxRSS -j JOBID
jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "", "medium", 5, 0, "", better.files.File(File(".").toPath())))
jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "10:00", "", 5, 40000, "", better.files.File(File(".").toPath())))
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment