Commit 41281865 authored by Holger Brandl's avatar Holger Brandl

merged with remote

parents 703ce35f 13ce56ad
......@@ -13,6 +13,7 @@ Options:
--overlay_expr_data Tsv with overlay data for the kegg pathways
--project <project_prefix> Name to prefix all generated result files [default: ]
--qcutoff <qcutoff> Use a q-value cutoff of 0.01 instead of a q-value cutoff [default: 0.01]
--biomart_database Name of biomart database - if not provided it will be guessed by the ensembl gene ids [default: ]
'
# commandArgs <- function(x) c("--overlay_expr_data", "../plot_score_matrix.txt", "../degs_by_contrast.txt", "contrast")
......@@ -522,13 +523,16 @@ stopifnot(map_lgl(pathwayPlots, ~ file.exists(.$plotfile)) %>% all)
install_package("biomaRt")
## prepare tooltips with expression scores
biomart_db <- ifelse(!is.null(opts$biomart_database), opts$biomart_database, guess_mart(geneLists$ensembl_gene_id))
ens2entrez <- quote({
# mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ensembl"))
## todo fix this https://support.bioconductor.org/p/74322/
# mart <- biomaRt::useDataset(guess_mart(geneLists$ensembl_gene_id), mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", host = "www.ensembl.org"))
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL",
dataset = guess_mart(geneLists$ensembl_gene_id),
# dataset = guess_mart(geneLists$ensembl_gene_id),
dataset = biomart_db,
host = "aug2017.archive.ensembl.org",
path = "/biomart/martservice",
archive = FALSE
......
......@@ -118,7 +118,7 @@ expMatrix = countData %>% column_to_rownames("gene_id") %>% as.matrix
expMatrix = expMatrix[complete.cases(expMatrix),]
group_labels = extract_col(colnames(expMatrix))
group_labels = data_frame(replicate = colnames(expMatrix)) %>% left_join(expDesign) %>% pull(condition)
names(group_labels) = colnames(expMatrix)
makePcaPlot(t(expMatrix), color_by = group_labels, title = "PCA of quantifiable proteins in all conditions")
......@@ -166,12 +166,6 @@ orderMatcheExpDesign = data_frame(replicate = colnames(expMatrix)) %>%
right_join(expDesign, by = "replicate") %>%
arrange(col_index)
table_browser(orderMatcheExpDesign)
#' The used design formula is.
designFormula
## build design matrix
#A key strength of limma’s linear modelling approach, is the ability accommodate arbitrary experimental complexity. Simple designs, such as the one in this workflow, with cell type and batch, through to more complicated factorial designs and models with interaction terms can be handled relatively easily
......@@ -190,7 +184,7 @@ exp_study = DGEList(counts = expMatrix, group = orderMatcheExpDesign$condition)
# par(mfrow=c(1,2)) ## 2panel plot for mean-var relationship before and after boom
## Removing heteroscedasticity from count data
#' Removing heteroscedasticity from count data
voomNorm <- voom(exp_study, design, plot = TRUE)
str(voomNorm)
# exp_study$counts is equilvaent to voomNorm$E see https://www.bioconductor.org/help/workflows/RNAseq123/
......@@ -208,7 +202,7 @@ str(voomNorm)
contr.matrix = makeContrasts(contrasts = with(contrasts, paste0("condition", condition_1, "-", "condition", condition_2)), levels = colnames(design) )
contr.matrix
#' ## Model Fitting & Moderated t-test
#' Linear modelling in limma is carried out using the lmFit and contrasts.fit functions originally written for application to microarrays. The functions can be used for both microarray and RNA-seq data and fit a separate model to the expression values for each gene. Next, empirical Bayes moderation is carried out by borrowing information across all the genes to obtain more precise estimates of gene-wise variability (source: RNAseq123)
vfit <- lmFit(voomNorm, design)
......@@ -234,7 +228,8 @@ summary(dt) %>% as_df %>% kable
#' plot MA per contrast
numContrasts = length(colnames(tfit))
par(mfrow=c(1,numContrasts)) ## 2panel plot for mean-var relationship before and after boom
colnames(tfit) %>% iwalk(~ plotMD(tfit, column=.y, status=dt[,.y], main=.x, xlim=c(-8,13)))
# colnames(tfit) %>% iwalk(~ plotMD(tfit, column=.y, status=dt[,.y], main=.x, xlim=c(-8,13)))
colnames(tfit) %>% iwalk(~ plotMD(tfit, column=.y, status=dt[,.y], main=.x))
# load_pack("Glimma")
......@@ -293,10 +288,10 @@ deResults %>% ggplot(aes(adj_p_val)) + geom_histogram()
#+ results='asis'
if (! is.null(qcutoff)) {
echo("Using q-value cutoff of", qcutoff)
deResults %<>% transform(is_hit = adj_p_val < qcutoff)
deResults %<>% transform(is_hit = adj_p_val <= qcutoff)
}else {
echo("Using p-value cutoff of", pcutoff)
deResults %<>% transform(is_hit = p_value < pcutoff)
deResults %<>% transform(is_hit = p_value <= pcutoff)
}
#+
......@@ -321,20 +316,22 @@ if (is.null(gene_info_file)) {
mart = biomaRt::useMart("ENSEMBL_MART_ENSEMBL", dataset = ensembl_dataset, host = "aug2017.archive.ensembl.org", path = "/biomart/martservice", archive = FALSE)
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
c("ensembl_gene_id", "external_gene_name", "description", "chromosome_name", "start_position", "end_position") %>%
biomaRt::getBM(mart = mart) %>%
tbl_df %>% rename(gene_id=ensembl_gene_id)
}) %>% cache_it("geneInfo")
# geneLengths = transmute(geneInfo, gene_id, gene_length = end_position - start_position)
geneDescs = transmute(geneInfo, gene_id, gene_name = external_gene_name, gene_description = description)
} else {
geneInfo = read_tsv(gene_info_file)
colnames(geneInfo)[1] = "gene_id"
geneDescs = geneInfo
}
write_tsv(geneInfo, path = add_prefix("geneInfo.txt"))
# geneLengths = transmute(geneInfo, gene_id, gene_length = end_position - start_position)
geneDescs = transmute(geneInfo, gene_id, gene_name = external_gene_name, gene_description = description)
deAnnot = deResults %>% left_join(geneInfo)
......@@ -361,7 +358,7 @@ deResults %>% ggplot(aes(logfc)) +
facet_grid(condition_1 ~ condition_2) +
geom_vline(xintercept = 0, color = "blue") +
# xlim(-2,2) +
ggtitle("sample1 over sampl2 logFC ")
ggtitle("condition_1 over condition_2 logFC ")
## Extract hits from deseq results
......@@ -381,15 +378,12 @@ degs %>%
spread(c1_overex, n) %>%
kable()
degs %>% mutate(contrast=paste(condition_1, "vs", condition_2))%>%
count(contrast, c1_overex) %>%
complete(contrast, c1_overex, fill = list(n = 0)) %>%
ggplot(aes(contrast, n , fill=c1_overex)) +
geom_col(position = "dodge") +
xlab(NULL) + ylab("# DGEs") +
ggtitle("DEGs by contrast") +
coord_flip()
ggplot(degs, aes(paste(condition_1, "vs", condition_2), fill = (c1_overex))) +
geom_bar(position = "dodge") +
xlab(NULL) +
ylab("# DGEs") +
ggtitle("DEGs by contrast") +
coord_flip()
#with(degs, as.data.frame(table(condition_1, condition_2, c1_overex))) %>% filter(Freq >0) %>% kable()
......@@ -505,5 +499,5 @@ voomNorm$E %>% as_df %>% rownames_to_column("gene_id") %>%
writeLines(capture.output(devtools::session_info()), ".sessionInfo.txt")
session::save.session(".dpa_limma.R.dat");
## session::restore.session(".dpa_limma.R.dat")
session::save.session(".dge_limma.R.dat");
## session::restore.session(".dge_limma.R.dat")
......@@ -69,4 +69,18 @@ srun --partition gpu --pty --gres=gpu:0 bash
jl submit -O "--gres=gpu:0" -q gpu "singularity exec docker://marcusczi/cellranger_clean cellranger --help"
jl wait
```
\ No newline at end of file
```
<br><br>
## UMI-tools
https://github.com/CGATOxford/UMI-tools
````
docker pull genomicpariscentre/umi-tools
docker run --rm -ti --name umi_tools genomicpariscentre/umi-tools:latest
````
\ No newline at end of file
This diff is collapsed.
......@@ -316,4 +316,18 @@ make
./flash --help
## https://sourceforge.net/projects/flashpage/files/
\ No newline at end of file
## https://sourceforge.net/projects/flashpage/files/
########################################################################################################################
### last
cd ~/bin/
wget http://last.cbrc.jp/last/index.cgi/archive/tip.tar.gz
tar -xvzf tip.tar.gz
rm tip.tar.gz
mkdir last
mv last-bc08832db1a1 last/
cd last/last-bc08832db1a1
make install prefix=/home/brandl/bin/last/last-bc08832db1a1/
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment