From 0cebbcb3e22ae8a14d06f5d6632b2e20e84f3c98 Mon Sep 17 00:00:00 2001 From: Holger Brandl <holgerbrandl@gmail.com> Date: Mon, 7 Sep 2015 16:59:18 +0200 Subject: [PATCH] cont star integration --- dge_workflow/dge_utils.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index d3eb835..dadb9ce 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -430,3 +430,29 @@ dge_create_star_index(){ mailme "created star index for $igenome" } export -f dge_create_star_index + + +dge_star_counts2matrix(){ +echo ' +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.13/R/core_commons.R") + +## STAR count file format is +#column 1: gene ID +#column 2: counts for unstranded RNA-seq +#column 3: counts for the 1st read strand aligned with RNA (htseq-count option -s yes) + +exprCounts <- list.files(".", "ReadsPerGene.out.tab") %>% ldply(function(countFile){ + read.delim(countFile, header=F) %>% + select(V1, V2) %>% + set_names("gene_id", "num_alignments") %>% + filter(!str_detect(gene_id, "^N_")) %>% + mutate(sample=trim_ext(countFile, ".ReadsPerGene.out.tab")) +}, .progress="text") + +countMatrix <- spread(exprCounts, sample, num_alignments) + +write.delim(countMatrix, "star_count_matrix.txt") +' | R --vanilla -q + +} +export -f dge_star_counts2matrix -- GitLab