From e17cdaf8f869894a105103bea082ddf834757a95 Mon Sep 17 00:00:00 2001
From: Holger Brandl <holgerbrandl@gmail.com>
Date: Wed, 28 Oct 2015 16:34:36 +0100
Subject: [PATCH] added expression tracking to simone analysis

---
 dge_workflow/dge_utils.sh | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh
index 1aa79e4..9e25660 100755
--- a/dge_workflow/dge_utils.sh
+++ b/dge_workflow/dge_utils.sh
@@ -450,6 +450,37 @@ dge_create_star_index(){
 export -f dge_create_star_index
 
 
+dge_get_pc_isoforms(){
+    # todo write more generic version that also filtered provided gtf and/or allow for ccds filtering as well
+
+    if [ $# -ne 1 ]; then
+        echo "Usage: dge_get_pc_isoforms <hsapiens/mmusculus/other_ensembl_species_identifier>" >&2 ; return;
+    fi
+
+    echo '
+    require(biomaRt)
+    require(dplyr)
+    require(ggplot2)
+
+    mart <- useDataset(paste0(commandArgs(T)[1], "_gene_ensembl"), mart = useMart("ensembl"))
+    #mart <- useDataset("hsapiens_gene_ensembl", mart = useMart("ensembl"))
+    #mart <- useDataset("mmusculus_gene_ensembl", mart = useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org"))
+
+    pcTx <- getBM(attributes=c("ensembl_gene_id", "ensembl_transcript_id", "gene_biotype", "transcript_biotype"),  mart=mart) %>%
+        filter(transcript_biotype=="protein_coding")
+
+    #ggplot(pcTx, aes(gene_biotype)) + geom_bar() + coord_flip()
+    #ggplot(pcTx, aes(transcript_biotype)) + geom_bar() + coord_flip()cd
+
+    #write.table(with(pcTx, data.frame(ensembl_transcript_id)), col.names=F, file="mm10_pc_tx.txt",quote=F,row.names=F)
+    # just print results to stdout
+    write.table(with(pcTx, data.frame(ensembl_transcript_id)), col.names=F, file=stdout(),quote=F,row.names=F)
+    ' | Rscript --vanilla - $1 2>/dev/null
+}
+export -f dge_get_pc_isoforms
+
+
+
 dge_star_counts2matrix(){
 echo '
 devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.13/R/core_commons.R")
-- 
GitLab