From 5a33481ced8b4e44ef8d5b225f8c342f902e8dfa Mon Sep 17 00:00:00 2001
From: Holger Brandl <holgerbrandl@gmail.com>
Date: Mon, 2 Nov 2015 15:19:07 +0100
Subject: [PATCH] improved genome guessing

---
 dge_workflow/dge_utils.sh          | 15 ++++++++++--
 misc/guess_species_from_gtf.scalah | 37 ++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100755 misc/guess_species_from_gtf.scalah

diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh
index 9e25660..d2dd44f 100755
--- a/dge_workflow/dge_utils.sh
+++ b/dge_workflow/dge_utils.sh
@@ -384,12 +384,23 @@ mysub "${project}__cuffdiff" "$cdCmd"  -q long -n 4 -R span[hosts=1] | blockScri
 tmpDbDir=$(mktemp -d)
 cp -r . $tmpDbDir
 
-## todo remove this hack
-genome=$(echo $gtfFile | cut -f8 -d'/' | tr '[:upper:]' '[:lower:]'); echo "genome is $genome"
+
+#genome=$(echo $gtfFile | cut -f8 -d'/' | tr '[:upper:]' '[:lower:]'); echo "genome is $genome"
 
 ## make sure to use temp-r to avoid file locking problems
 #export R_LIBS=/tmp/r_index
 
+
+genome=$(scala -e '
+val gtfFile = args(0); //val gtfFile="mm10_igenomes_pc.gtf"
+val pattern = "mm10|mm9|hg19|zv9".r
+println(pattern.findFirstIn(gtfFile).getOrElse(""))
+' $(readlink -f $gtfFile)
+)
+echo $genome
+
+
+
 echo '
 require(cummeRbund)
 dbDir=commandArgs(T)[1]
diff --git a/misc/guess_species_from_gtf.scalah b/misc/guess_species_from_gtf.scalah
new file mode 100755
index 0000000..70188fd
--- /dev/null
+++ b/misc/guess_species_from_gtf.scalah
@@ -0,0 +1,37 @@
+#!/bin/sh
+exec scalas "$0" "$@"
+!#
+
+/** Work in progress: Guess the species from a gtf file. By name first and then by file content. A more simplistic approach is already implementd in dge_workflow/dge_utils.sh
+  */
+
+import java.io.File
+
+import scala.io.Source
+
+// http://alvinalexander.com/scala/scala-shell-script-command-line-arguments-args
+val gtfFile = args(1)
+//val gtfFile="mm10_igenomes_pc.gtf"
+
+val pattern = "mm10|mm9|h19|zv9".r
+val genomeByName = pattern.findFirstIn(gtfFile)
+
+if (genomeByName.isEmpty) {
+  System.exit(1)
+}
+
+def guessFromContent(gtfFile: File): Option[String] = {
+  //  Source.fromString(s"grep ENSMUSG $gtfFile | "!!).getLines().hasNext
+  //  Bash.evalCapture(s"grep ENSMUSG $gtfFile | wc -l")
+
+  if (Source.fromFile(gtfFile).getLines().exists(_.contains("ENSMUSG"))) return Some("mouse")
+  if (Source.fromFile(gtfFile).getLines().exists(_.contains("ENSCAFG"))) return Some("dog")
+
+  None
+}
+
+
+genomeByName.get match {
+  case "mm9" =>
+}
+println(genomeByName)
\ No newline at end of file
-- 
GitLab