Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
ngs_tools
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bioinfo
ngs_tools
Commits
a7cb6336
Commit
a7cb6336
authored
9 years ago
by
Holger Brandl
Browse files
Options
Downloads
Patches
Plain Diff
smoothed deseq workflow
parent
9c606c16
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
dge_workflow/featcounts_deseq.R
+24
-55
24 additions, 55 deletions
dge_workflow/featcounts_deseq.R
with
24 additions
and
55 deletions
dge_workflow/featcounts_deseq.R
+
24
−
55
View file @
a7cb6336
...
...
@@ -33,25 +33,6 @@ require.auto(DT)
require.auto
(
gplots
)
##+ results='asis', echo=FALSE
#cat('
#<link rel="stylesheet" type="text/css" href="http://cdn.datatables.net/1.10.5/css/jquery.dataTables.min.css">
#<script type="text/javascript" charset="utf8" src="http://code.jquery.com/jquery-2.1.2.min.js"></script>
#<script type="text/javascript" charset="utf8" src="http://cdn.datatables.net/1.10.5/js/jquery.dataTables.min.js"></script>
#
#<script type="text/javascript">
# $(document).ready(function() {
# // alert("test")
# //$("table").DataTable();
# //$("table").DataTable();
# //$("#tab_id").DataTable();
# $(".dtable").DataTable();
# } );
#</script>
#')
count_matrix_file
<-
opts
$
count_matrix
contrasts_file
<-
opts
$
contrasts
...
...
@@ -61,8 +42,6 @@ if(is.numeric(pcutoff)) opts$qcutoff <- NULL
########################################################################################################################
#' # Differential Expresssion Analysis
#' ## Data preparation
#' Working Directory: `r getwd()`
...
...
@@ -74,41 +53,27 @@ countData <- read.delim(count_matrix_file)
head
(
countData
)
%>%
kable
()
countMatrix
<-
countData
%>%
column2rownames
(
"ensembl_gene_id"
)
%>%
as.matrix
()
#' Apply expression filter
nrow
(
countMatrix
)
countMatrix
%<>%
filterByExpression
()
nrow
(
countMatrix
)
# Expression Filtering
genesBefore
<-
nrow
(
countMatrix
)
countMatrix
%<>%
filterByExpression
(
opts
$
min_count
)
genesAfter
<-
nrow
(
countMatrix
)
#' Counts were filtered to only retain genes which had more that `opts$min_count` alignments in at least one replicates. This reduced the number of genes from `r genesBefore' to `r genesAfter`.
########################################################################################################################
#' # Load annotation data
## load transcriptome annotations needed for results annotation
geneInfo
<-
quote
({
## mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))§
mart
<-
biomaRt
::
useDataset
(
guess_mart
(
countData
$
ensembl_gene_id
),
mart
=
biomaRt
::
useMart
(
"ensembl"
))
c
(
"ensembl_gene_id"
,
"external_gene_name"
,
"description"
,
"chromosome_name"
,
"start_position"
,
"end_position"
)
%>%
biomaRt
::
getBM
(
mart
=
mart
)
})
%>%
cache_it
(
"geneInfo"
)
#filterByExpression <- function(fpkmMat, min_count=1, logMode=F){
# if(logMode) fpkmMat<-log10(fpkmMat+1) ## add a pseudocount
#
# geneMax <- apply(fpkmMat, 1, max)
#
# fpkmMat[geneMax>min_count,]
#}
#countMatrixFilt <- filterByExpression(countMatrix, min_count=50)
#' ## Perform Differential Expression Analysis
#' See deseq reference [docs](http://master.bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf) for details
#' To understand fold-change shrinking and estimation check out
#' [Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2](http://genomebiology.com/2014/15/12/550/abstract)
## todo expose as argument
# TODO add general info about the deseq process/model here
get_sample_from_replicate
<-
function
(
repName
)
str_match
(
repName
,
"(.*)_[0-9]{1}$"
)[,
2
]
#
'
Define or load a contrasts matrix
# Define or load a contrasts matrix
if
(
!
is.null
(
contrasts_file
)){
contrasts
<-
read.delim
(
contrasts_file
,
header
=
T
)
%>%
fac2char
()
}
else
{
...
...
@@ -120,12 +85,9 @@ if(!is.null(contrasts_file)){
write.delim
(
contrasts
,
"dba_contrasts.txt"
)
}
#' The used contrasts model is
contrasts
%>%
kable
()
########################################################################################################################
#' ## Perform Differential Expression Analysis
## gene specific factors
#normalizationFactors(dds)
...
...
@@ -145,9 +107,7 @@ dds <- DESeqDataSetFromMatrix(countMatrix, colData, formula(~ condition))
dds
<-
estimateSizeFactors
(
dds
)
#' # Custom Lambda Normalization
#' See https://www.biostars.org/p/79978/
#' For details about size factor normalziation and calculation see https://www.biostars.org/p/79978/
##' Size Factors estimated by DESeq
sizeFactors
(
dds
)
%>%
set_names
(
colnames
(
countMatrix
))
%>%
melt
()
%>%
rownames2column
(
"sample"
)
%>%
ggplot
(
aes
(
sample
,
value
))
+
geom_bar
(
stat
=
"identity"
)
+
ggtitle
(
"deseq size factors"
)
...
...
@@ -167,7 +127,6 @@ dds <- DESeq(dds)
#resultsNames(dds)
#' Model Overview
#+ results='asis'
summary
(
results
(
dds
))
########################################################################################################################
...
...
@@ -339,6 +298,15 @@ deResults %>% ggplot(aes(s1_over_s2_logfc, -log10(pvalue), color=is_hit)) +
## load transcriptome annotations needed for results annotation
geneInfo
<-
quote
({
## mart <- biomaRt::useDataset("drerio_gene_ensembl", mart = biomaRt::useMart("ensembl"))§
mart
<-
biomaRt
::
useDataset
(
guess_mart
(
countData
$
ensembl_gene_id
),
mart
=
biomaRt
::
useMart
(
"ensembl"
))
c
(
"ensembl_gene_id"
,
"external_gene_name"
,
"description"
,
"chromosome_name"
,
"start_position"
,
"end_position"
)
%>%
biomaRt
::
getBM
(
mart
=
mart
)
})
%>%
cache_it
(
"geneInfo"
)
# Export the complete dataset for later analysis
deAnnot
<-
deResults
%>%
#inner_join(normCounts) %>%
...
...
@@ -430,7 +398,8 @@ save(degs, file=".degs.RData")
########################################################################################################################
#' ### MA Plot
#' ## MA Plots
#' Redo MA plots but now including hit overlays
maPlots
<-
deResults
%>%
group_by
(
sample_1
,
sample_2
)
%>%
do
(
gg
=
{
maData
<-
.
...
...
@@ -441,7 +410,7 @@ maData %>% ggplot(aes(0.5*log2(norm_count_1*norm_count_2), log2(norm_count_1/nor
ggtitle
(
with
(
maData
[
1
,],
paste
(
sample_1
,
"vs"
,
sample_2
)))
})
%$%
gg
#+ fig.height=
10
*ceiling(length(maPlots)/2)
#+ fig.height=
7
*ceiling(length(maPlots)/2)
multiplot
(
plotlist
=
maPlots
,
cols
=
min
(
2
,
length
(
maPlots
)))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment