diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index d3eb83528e6809b4ff1306d7e45a97a2c9367dbe..dadb9ce674dab4a856d70dc2d7c823dd2beb1e05 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -430,3 +430,29 @@ dge_create_star_index(){ mailme "created star index for $igenome" } export -f dge_create_star_index + + +dge_star_counts2matrix(){ +echo ' +devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.13/R/core_commons.R") + +## STAR count file format is +#column 1: gene ID +#column 2: counts for unstranded RNA-seq +#column 3: counts for the 1st read strand aligned with RNA (htseq-count option -s yes) + +exprCounts <- list.files(".", "ReadsPerGene.out.tab") %>% ldply(function(countFile){ + read.delim(countFile, header=F) %>% + select(V1, V2) %>% + set_names("gene_id", "num_alignments") %>% + filter(!str_detect(gene_id, "^N_")) %>% + mutate(sample=trim_ext(countFile, ".ReadsPerGene.out.tab")) +}, .progress="text") + +countMatrix <- spread(exprCounts, sample, num_alignments) + +write.delim(countMatrix, "star_count_matrix.txt") +' | R --vanilla -q + +} +export -f dge_star_counts2matrix