Skip to content
Snippets Groups Projects
Commit 7bd3e86b authored by Holger Brandl's avatar Holger Brandl
Browse files

fixed for pe alignments

parent db33dee0
No related branches found
No related tags found
No related merge requests found
......@@ -24,8 +24,8 @@ if(is.na(file.info(baseDir)$isdir)){
stop(paste("base directory", baseDir, "does not exist"))
}
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/core_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.4/R/ggplot_commons.R")
devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.26/R/core_commons.R")
#devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.26/R/ggplot_commons.R")
########################################################################################################################
......@@ -35,19 +35,29 @@ devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v
logSuffix=".bowtie.log"
parseAlgnSummary <- function(alignSummary){
#alignSummary="/lustre/projects/bioinfo/holger/projects/khan_chipseq_h1/alignments/H1L_Shield.bowtie.log"
# pe example
#alignSummary="/lustre/projects/plantx/smed_genome/schMed2/alignments_SMED_P297/CP-1318_S1_L001.bowtie.logs"
# se example
# alignSummary= "/lustre/projects/plantx/smed_genome/schMed2/alignments_SMED_P297/SRR959562.bowtie.logs"
algnData <- readLines(alignSummary)
findLine <- function(lines, pattern)lines[str_detect(lines, pattern)] %>% str_trim()
# findLine(algnData, "overall alignment rate")
multiLine = findLine(algnData, "aligned >1 times")
uniqueLine = findLine(algnData, "aligned exactly 1 time")
data.frame(
sample=trimEnd(basename(alignSummary), logSuffix),
num_reads=as.numeric(str_split_fixed(algnData[1], " ", 2)[1]),
mapping_efficiency=as.numeric(str_replace(str_split_fixed(algnData[6], " ", 2)[1], "%", "")),
mapping_efficiency=as.numeric(str_replace(str_split_fixed(findLine(algnData, "overall alignment rate"), " ", 2)[1], "%", "")),
unique_mappers=as.numeric(str_split_fixed(str_trim(algnData[4]), " ", 2)[1]),
unique_mapper_prop=str_match(algnData[4], "[(]([0-9.]*)[%)]*") %>% subset(select=2) %>% as.numeric(),
unique_mappers=as.numeric(str_split_fixed(uniqueLine, " ", 2)[1]),
unique_mapper_prop=str_match(uniqueLine, "[(]([0-9.]*)[%)]*") %>% subset(select=2) %>% as.numeric(),
multi_mappers=as.numeric(str_split_fixed(str_trim(algnData[5]), " ", 2)[1]),
multi_mappers_prop=str_match(algnData[5], "[(]([0-9.]*)[%)]*") %>% subset(select=2) %>% as.numeric()
multi_mappers=as.numeric(str_split_fixed(multiLine, " ", 2)[1]),
multi_mappers_prop=str_match(multiLine, "[(]([0-9.]*)[%)]*") %>% subset(select=2) %>% as.numeric()
)
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment