Commit b485c9e1 authored by Holger Brandl's avatar Holger Brandl

cont. mf deseq

parent ffc16c40
...@@ -14,6 +14,7 @@ Options: ...@@ -14,6 +14,7 @@ Options:
--pcutoff <pcutoff> Override q-value filter and filter by p-value instead --pcutoff <pcutoff> Override q-value filter and filter by p-value instead
--min_count <min_count> Minimal expression in any of the sample to be included in the final result list [default: 10] --min_count <min_count> Minimal expression in any of the sample to be included in the final result list [default: 10]
--project <project_prefix> Name to prefix all generated result files [default: ] --project <project_prefix> Name to prefix all generated result files [default: ]
--design <exp_design> Name to prefix all generated result files [default: sample]
' '
opts <- docopt(doc, commandArgs(TRUE)) opts <- docopt(doc, commandArgs(TRUE))
...@@ -111,7 +112,7 @@ if(!is.null(contrasts_file)){ ...@@ -111,7 +112,7 @@ if(!is.null(contrasts_file)){
#' The used design matrix is #' The used design matrix is
contrasts %>% kable() replicates2samples %>% kable()
#' The contrasts of interest are #' The contrasts of interest are
contrasts %>% kable() contrasts %>% kable()
...@@ -130,23 +131,33 @@ contrasts %>% kable() ...@@ -130,23 +131,33 @@ contrasts %>% kable()
#colData <- data.frame(condition=colnames(countMatrix) %>% get_sample_from_replicate) #colData <- data.frame(condition=colnames(countMatrix) %>% get_sample_from_replicate)
## new mf-approach ## new mf-approach
colData <- replicates2samples %>%data.frame(replciate=colnames(countMatrix) %>% get_sample_from_replicate) %>% colData <- data_frame(replicate=colnames(countMatrix)) %>% mutate(col_index=row_number()) %>%
arrange(replicate) full_join(replicates2samples, by="replicate") %>%
colData %<>% rename(sample, sample_2ndwt) arrange(col_index) #%>% transmute(condition=sample_2ndwt) %>% fac2char
## infer the sample to be used from the formula
#designFormula <- "sample_2ndwt + batch"
designFormula <- opts$design
## consider last one as sample attribute
sampleAttribute <- str_split(designFormula, " ")
colData
#colData %<>% rename(sample, sample_2ndwt)
#colData <- replicates2samples %>% arrange(colnames(countMatrix)) #colData <- replicates2samples %>% arrange(colnames(countMatrix))
# valiadate that contrasts model is compatible with data # valiadate that contrasts model is compatible with data
%<>% #if(!all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)){
if(!all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)){ # echo("column model: ",colData$condition)
echo("column model: ",colData$condition) # echo("contrasts: ", contrasts %>% gather %$% value %>% unique)
echo("contrasts: ", contrasts %>% gather %$% value %>% unique) # stop("column model is not consistent with contrasts")
stop("column model is not consistent with contrasts") #}
}
#stopifnot(all((contrasts %>% gather %$% value %>% unique) %in% colData$condition)) #stopifnot(all((contrasts %>% gather %$% value %>% unique) %in% colData$condition))
dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(~ condition)) #http://www.cookbook-r.com/Formulas/Creating_a_formula_from_a_string/
dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(as.formula("~ batch + sample_2ndwt")))
#dds <- estimateSizeFactors(dds) #dds <- estimateSizeFactors(dds)
#dds <- estimateDispersions(dds) #dds <- estimateDispersions(dds)
dds <- DESeq(dds) dds <- DESeq(dds)
...@@ -177,7 +188,7 @@ plotDispEsts(dds, main="Dispersion plot") ...@@ -177,7 +188,7 @@ plotDispEsts(dds, main="Dispersion plot")
# Regularized log transformation for clustering/heatmaps, etc # Regularized log transformation for clustering/heatmaps, etc
rld <- rlogTransformation(dds) rld <- rlogTransformation(dds)
plotPCA(rld, intgroup = "condition") plotPCA(rld, intgroup = "sample")
#' The Euclidean distance between samples are calculated after performing the regularized log transformation. #' The Euclidean distance between samples are calculated after performing the regularized log transformation.
......
#!/usr/bin/env kscript #!/usr/bin/env kscript
//DEPS com.offbytwo:docopt:0.6.0.20150202 de.mpicbg.scicomp:joblist:0.6 de.mpicbg.scicomp.bioinfo:kutils:0.1-SNAPSHOT //DEPS de.mpicbg.scicomp:joblist:0.6 de.mpicbg.scicomp.bioinfo:kutils:0.1-SNAPSHOT
// add docopts to local m2 index // add docopts to local m2 index
...@@ -92,6 +92,9 @@ for (fastqFile in fastqFiles) { ...@@ -92,6 +92,9 @@ for (fastqFile in fastqFiles) {
val fastqBaseName = fastqFile.name.removeSuffix(".gz").removeSuffix(".fastq") val fastqBaseName = fastqFile.name.removeSuffix(".gz").removeSuffix(".fastq")
val optionalZcat = if (fastqFile.name.endsWith("gz")) "--readFilesCommand zcat" else "" val optionalZcat = if (fastqFile.name.endsWith("gz")) "--readFilesCommand zcat" else ""
// todo consider to use --outTmpDir which defaults to outFileNamePrefix STARtmp and which is deleted automatically
// or use process substiutation in case of zipped reads (see https://github.com/alexdobin/STAR/issues/143#issuecomment-216597465)
val cmd = """ val cmd = """
STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 ${optionalZcat} --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts --outFilterMultimapNmax 1 --outSJfilterCountUniqueMin 8 3 3 3 STAR --genomeDir $star_index --readFilesIn $fastqFile --runThreadN 6 ${optionalZcat} --outFileNamePrefix ${fastqBaseName}. --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --sjdbGTFfile $gtfFile --outFilterIntronMotifs RemoveNoncanonicalUnannotated --outFilterType BySJout --quantMode GeneCounts --outFilterMultimapNmax 1 --outSJfilterCountUniqueMin 8 3 3 3
mv ${fastqBaseName}.Aligned.sortedByCoord.out.bam ${fastqBaseName}.bam mv ${fastqBaseName}.Aligned.sortedByCoord.out.bam ${fastqBaseName}.bam
...@@ -101,7 +104,7 @@ for (fastqFile in fastqFiles) { ...@@ -101,7 +104,7 @@ for (fastqFile in fastqFiles) {
// todo provide proper walltime here // todo provide proper walltime here
// slurm memory limit https://rc.fas.harvard.edu/resources/documentation/slurm-memory/ // slurm memory limit https://rc.fas.harvard.edu/resources/documentation/slurm-memory/
// sacct -o MaxRSS -j JOBID // sacct -o MaxRSS -j JOBID
jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "", "medium", 5, 0, "", better.files.File(File(".").toPath()))) jl.run(JobConfiguration(cmd, "star__${fastqBaseName}", "10:00", "", 5, 40000, "", better.files.File(File(".").toPath())))
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment