From e17cdaf8f869894a105103bea082ddf834757a95 Mon Sep 17 00:00:00 2001 From: Holger Brandl <holgerbrandl@gmail.com> Date: Wed, 28 Oct 2015 16:34:36 +0100 Subject: [PATCH] added expression tracking to simone analysis --- dge_workflow/dge_utils.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 1aa79e4..9e25660 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -450,6 +450,37 @@ dge_create_star_index(){ export -f dge_create_star_index +dge_get_pc_isoforms(){ + # todo write more generic version that also filtered provided gtf and/or allow for ccds filtering as well + + if [ $# -ne 1 ]; then + echo "Usage: dge_get_pc_isoforms <hsapiens/mmusculus/other_ensembl_species_identifier>" >&2 ; return; + fi + + echo ' + require(biomaRt) + require(dplyr) + require(ggplot2) + + mart <- useDataset(paste0(commandArgs(T)[1], "_gene_ensembl"), mart = useMart("ensembl")) + #mart <- useDataset("hsapiens_gene_ensembl", mart = useMart("ensembl")) + #mart <- useDataset("mmusculus_gene_ensembl", mart = useMart("ENSEMBL_MART_ENSEMBL", host="www.ensembl.org")) + + pcTx <- getBM(attributes=c("ensembl_gene_id", "ensembl_transcript_id", "gene_biotype", "transcript_biotype"), mart=mart) %>% + filter(transcript_biotype=="protein_coding") + + #ggplot(pcTx, aes(gene_biotype)) + geom_bar() + coord_flip() + #ggplot(pcTx, aes(transcript_biotype)) + geom_bar() + coord_flip()cd + + #write.table(with(pcTx, data.frame(ensembl_transcript_id)), col.names=F, file="mm10_pc_tx.txt",quote=F,row.names=F) + # just print results to stdout + write.table(with(pcTx, data.frame(ensembl_transcript_id)), col.names=F, file=stdout(),quote=F,row.names=F) + ' | Rscript --vanilla - $1 2>/dev/null +} +export -f dge_get_pc_isoforms + + + dge_star_counts2matrix(){ echo ' devtools::source_url("https://raw.githubusercontent.com/holgerbrandl/datautils/v1.13/R/core_commons.R") -- GitLab