Skip to content
Snippets Groups Projects
Commit b485c9e1 authored by Holger Brandl's avatar Holger Brandl
Browse files

cont. mf deseq

parent ffc16c40
No related branches found
No related tags found
No related merge requests found
......@@ -14,6 +14,7 @@ Options:
--pcutoff <pcutoff> Override q-value filter and filter by p-value instead
--min_count <min_count> Minimal expression in any of the sample to be included in the final result list [default: 10]
--project <project_prefix> Name to prefix all generated result files [default: ]
--design <exp_design> Name to prefix all generated result files [default: sample]
'
opts <- docopt(doc, commandArgs(TRUE))
......@@ -111,7 +112,7 @@ if(!is.null(contrasts_file)){
#' The used design matrix is
contrasts %>% kable()
replicates2samples %>% kable()
#' The contrasts of interest are
contrasts %>% kable()
......@@ -130,23 +131,33 @@ contrasts %>% kable()
#colData <- data.frame(condition=colnames(countMatrix) %>% get_sample_from_replicate)
## new mf-approach
colData <- replicates2samples %>%data.frame(replciate=colnames(countMatrix) %>% get_sample_from_replicate) %>%
arrange(replicate)
colData %<>% rename(sample, sample_2ndwt)
colData <- data_frame(replicate=colnames(countMatrix)) %>% mutate(col_index=row_number()) %>%
full_join(replicates2samples, by="replicate") %>%
arrange(col_index) #%>% transmute(condition=sample_2ndwt) %>% fac2char
## infer the sample to be used from the formula
#designFormula <- "sample_2ndwt + batch"
designFormula <- opts$design
## consider last one as sample attribute
sampleAttribute <- str_split(designFormula, " ")
colData
#colData %<>% rename(sample, sample_2ndwt)
#colData <- replicates2samples %>% arrange(colnames(countMatrix))
# valiadate that contrasts model is compatible with data
%<>%
if(!all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)){
echo("column model: ",colData$condition)
echo("contrasts: ", contrasts %>% gather %$% value %>% unique)
stop("column model is not consistent with contrasts")
}
#if(!all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)){
# echo("column model: ",colData$condition)
# echo("contrasts: ", contrasts %>% gather %$% value %>% unique)
# stop("column model is not consistent with contrasts")
#}
#stopifnot(all((contrasts %>% gather %$% value %>% unique) %in% colData$condition))
dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(~ condition))
#http://www.cookbook-r.com/Formulas/Creating_a_formula_from_a_string/
dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(as.formula("~ batch + sample_2ndwt")))
#dds <- estimateSizeFactors(dds)
#dds <- estimateDispersions(dds)
dds <- DESeq(dds)
......@@ -177,7 +188,7 @@ plotDispEsts(dds, main="Dispersion plot")
# Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "condition")
plotPCA(rld, intgroup = "sample")
#' The Euclidean distance between samples are calculated after performing the regularized log transformation.
......
#!/usr/bin/env kscript
//DEPS com.offbytwo:docopt:0.6.0.20150202 de.mpicbg.scicomp:joblist:0.6 de.mpicbg.scicomp.bioinfo:kutils:0.1-SNAPSHOT
//DEPS de.mpicbg.scicomp:joblist:0.6 de.mpicbg.scicomp.bioinfo:kutils:0.1-SNAPSHOT
// add docopts to local m2 index
......@@ -92,6 +92,9 @@ for (fastqFile in fastqFiles) {
val fastqBaseName = fastqFile.name.removeSuffix(".gz").removeSuffix(".fastq")
val optionalZcat = if (fastqFile.name.endsWith("gz")) "--readFilesCommand zcat" else ""
// todo consider to use --outTmpDir which defaults to outFileNamePrefix STARtmp and which is deleted automatically
// or use process substiutation in case of zipped reads (see https://github.com/alexdobin/STAR/issues/143#issuecomment-216597465)
val cmd = """
STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 ${optionalZcat} --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts --outFilterMultimapNmax 1 --outSJfilterCountUniqueMin 8 3 3 3
mv ${fastqBaseName}.Aligned.sortedByCoord.out.bam ${fastqBaseName}.bam
......@@ -101,7 +104,7 @@ for (fastqFile in fastqFiles) {
// todo provide proper walltime here
// slurm memory limit https://rc.fas.harvard.edu/resources/documentation/slurm-memory/
// sacct -o MaxRSS -j JOBID
jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "", "medium", 5, 0, "", better.files.File(File(".").toPath())))
jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "10:00", "", 5, 40000, "", better.files.File(File(".").toPath())))
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment