Commit eaada955 authored by domingue's avatar domingue

Merge branch 'master' of https://git.mpi-cbg.de/bioinfo/ngs_tools

Updating to changes made recently.
parents 2428e2a9 69b4edfa
......@@ -121,6 +121,10 @@ dge_analyze_duplicates.sh $(ls *.bam)
mailme "${PRJ_NAME}: mapping done"
## multiQC - STAR log files summary
multiqc *.Log.final.out
## Kallisto
###TODO build kallisto index if not present
......
......@@ -61,6 +61,9 @@ library(IHW)
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.45/R/core_commons.R")
devtools::source_url("https://git.mpi-cbg.de/bioinfo/datautils/raw/v1.45/R/ggplot_commons.R")
load_pack(ggpubr)
load_pack(gridExtra)
## also load common helper for expression data analysis
# source(interp_from_env("${NGS_TOOLS}/dge_workflow/diffex_commons.R"))
......@@ -537,7 +540,7 @@ normCounts %>% write_tsv(paste0(resultsBase, "sizefac_normalized_counts_by_repli
#' ### MA and Volcano plots
#' MA-plot: The log2 fold change for a particular comparison is plotted on the y-axis and the average of the counts normalized by log2 is shown on the x-axis ("M" for minus, because a log ratio is equal to log minus log, and "A" for average). Each gene is represented with a dot. Genes with an adjusted p value below a certain threshold are shown in cyan (True).
#' This plot demonstrates that only genes with a large average normalized count contain sufficient information to yield a significant call.
#' This plot demonstrates that only genes with a large average normalized count contain sufficient information to yield a significant call. The [ma_plots.pdf](ma_plots.pdf) file contains MA plots colored according to up- and down-regulated genes.
# deseq approach
# plotMA(deResults, main="DESeq2", ylim=c(-2,2))
......@@ -573,6 +576,31 @@ deResults %>% ggplot(aes(0.5 * log2(mean_norm_count_1 * mean_norm_count_2), log2
facet_grid(condition_1 ~ condition_2)
## MA plot using ggmapplot() from the ggpubr package (https://rpkgs.datanovia.com/ggpubr/reference/ggmaplot.html)
maPlots <- lapply(unique(paste0(deResults$condition_1, "_vs_", deResults$condition_2)), function(x){
plotData <- deResults %>% filter(paste0(condition_1, "_vs_", condition_2) == x) %>%
rename(log2FoldChange = c1_over_c2_logfc)
plotData %>% ggmaplot(main = paste0(unique(plotData$condition_1), " -> ", unique(plotData$condition_2)),
fdr = ifelse(! is.null(qcutoff), qcutoff, pcutoff),
fc = 2^lfc_cutoff,
size = 0.4,
#palette = c("#B31B21", "#1465AC", "darkgray"),
genenames = as.vector(plotData$ensembl_gene_id),
legend = "top", top = 20,
font.label = c("bold", 11), label.rectangle = TRUE,
font.legend = "bold",
font.main = "bold",
ggtheme = ggplot2::theme_minimal())
})
pdf("ma_plots.pdf", onefile = TRUE)
for (i in seq(length(maPlots))) {
print(maPlots[[i]])
}
dev.off()
#deResults %$% pvalue %>% log10() %>% quantile(0.05, na.rm=T)
#' A volcano plot displays unstandardized signal (e.g. log-fold-change) against noise-adjusted/standardized signal (e.g. t-statistic or -log(10)(p-value) from the t-test).
......
......@@ -377,3 +377,20 @@ cd ~/bin/
wget http://www.bioinf.uni-freiburg.de/Software/LocalFold/LocalFold-1.0.tar.gz
tar -zxf LocalFold-1.0.tar.gz
########################################################################################################################
### SalmonTE
cd ~/bin/
git clone https://github.com/hyunhwaj/SalmonTE
########################################################################################################################
### Salmon
cd ~/bin/
wget https://github.com/COMBINE-lab/salmon/releases/download/v0.14.0/salmon-0.14.0_linux_x86_64.tar.gz
tar -zxf salmon-0.14.0_linux_x86_64.tar.gz
mv salmon-latest_linux_x86_64 salmon-0.14.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment