Skip to content
Snippets Groups Projects
Commit e827b0df authored by Holger Brandl's avatar Holger Brandl
Browse files

cont. chipseq analysis

parent 820b69ac
No related branches found
No related tags found
No related merge requests found
########################################################################################################################
### Picard ResortSam
sort -k1V $bowtieIndex.fa.dict_tmp > $bowtieIndex.dict
samtools view -h $bamFile | removeMultiMappers > ${bamBaseName}_mmf.tmp.bam
samtools index ${bamBaseName}_mmf.tmp.bam # because ReorderSam runs substantially faster if the input is an indexed BAM file.
picard ReorderSam I=${bamBaseName}_mmf.tmp.bam O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa
## samtools view -h $bamFile | removeMultiMappers | picard ReorderSam I=/dev/stdin O=${bamBaseName}_mmf.bam REFERENCE=$bowtieIndex.fa
samtools index ${bamBaseName}_mmf.bam
## extract chromosome from bam file
samtools view -bo test.bam /home/brandl/mnt/chip-seq_study/ChIPSeq_February_2014/alignments_trimmed_nomulti_pooled/H2Az.bam 1
......
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/ggplot_commons.R")
require(knitr)
require.auto(tidyr)
require(tidyr)
if (!require("DT")) devtools::install_github("rstudio/DT") ## see http://rstudio.github.io/DT/
## setwd("/projects/bioinfo/holger/projects/krause_chipseq/macs2__OLD_SORTING")
########################################################################################################################
#' # Called Peaks Overview
## https://github.com/taoliu/MACS/
readMacsPeaks <- function(peakFile){
read.delim(peakFile, header=F) %>% mutate(file=basename(peakFile))
}
logSuffix="*.narrowPeak.*"
peaks <- list.files(".", logSuffix, full.names=TRUE, recursive=T) %>% ldply(readMacsPeaks)
#5th column: -10*log10pvalue of peak region" (see http://seqanswers.com/forums/showthread.php?t=18674)
## for spec see https://github.com/taoliu/MACS/#output-files
peaks %<>% set_names(c("chromosome_name", "start_position", "end_position", "peak_id", "score", "strand", "fold_change", "pvalue", "qvalue", "sum_rel_to_start", "file"))
## todo remove this
peaks %<>% mutate(file=str_replace(file, "_narrow",".narrow"))
peaks %<>% separate(file, c("sample", "peak_type", "ext"), sep="[.]") %>% select(-ext)
splitProtTime <- . %>% separate(sample, c("protein", "timpoint"), sep="_", remove=F)
#peaks %>% splitProtTime %>% head
peaks %<>% mutate(peak_width=end_position-start_position)
peaks %>%
# sample_frac(0.3) %>%
ggplot(aes(end_position-start_position)) +
geom_histogram() +
facet_grid(sample ~ peak_type) +
# + geom_vline(yintercept=0, color="blue")
ggtitle("peak width distribution") +
xlim(0,1000)
peaks %>%
# sample_frac(0.3) %>%
ggplot(aes(sample, peak_width)) +
geom_jitter(alpha=0.05, data=sample_frac(peaks, 0.1)) +
geom_violin(alpha=0.8) +
facet_wrap(~ peak_type) +
# + geom_vline(yintercept=0, color="blue")
ggtitle("peak width distribution") +
ylim(0,1000)
#peaks %>% group_by(sample, type) %>% summarize(num_peaks=n(), mean_width=median(end_position-start_position))
peaks %>% ggplot(aes(sample, fill=peak_type)) + geom_bar() + ggtitle("peak counts")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment