From 0cebbcb3e22ae8a14d06f5d6632b2e20e84f3c98 Mon Sep 17 00:00:00 2001
From: Holger Brandl <holgerbrandl@gmail.com>
Date: Mon, 7 Sep 2015 16:59:18 +0200
Subject: [PATCH] cont star integration

---
 dge_workflow/dge_utils.sh | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh
index d3eb835..dadb9ce 100755
--- a/dge_workflow/dge_utils.sh
+++ b/dge_workflow/dge_utils.sh
@@ -430,3 +430,29 @@ dge_create_star_index(){
     mailme "created star index for $igenome"
 }
 export -f dge_create_star_index
+
+
+dge_star_counts2matrix(){
+echo '
+devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.13/R/core_commons.R")
+
+## STAR count file format is
+#column 1: gene ID
+#column 2: counts for unstranded RNA-seq
+#column 3: counts for the 1st read strand aligned with RNA (htseq-count option -s yes)
+
+exprCounts <- list.files(".", "ReadsPerGene.out.tab") %>% ldply(function(countFile){
+    read.delim(countFile, header=F) %>%
+        select(V1, V2) %>%
+        set_names("gene_id", "num_alignments") %>%
+        filter(!str_detect(gene_id, "^N_")) %>%
+        mutate(sample=trim_ext(countFile, ".ReadsPerGene.out.tab"))
+}, .progress="text")
+
+countMatrix <- spread(exprCounts, sample, num_alignments)
+
+write.delim(countMatrix, "star_count_matrix.txt")
+' | R --vanilla -q
+
+}
+export -f dge_star_counts2matrix
-- 
GitLab