diff --git a/dge_workflow/dge_utils.sh b/dge_workflow/dge_utils.sh index 9e25660740ee62ecbfef529264e724c6487181d8..d2dd44f7e7c83033eb6113a459a420648bbd9200 100755 --- a/dge_workflow/dge_utils.sh +++ b/dge_workflow/dge_utils.sh @@ -384,12 +384,23 @@ mysub "${project}__cuffdiff" "$cdCmd" -q long -n 4 -R span[hosts=1] | blockScri tmpDbDir=$(mktemp -d) cp -r . $tmpDbDir -## todo remove this hack -genome=$(echo $gtfFile | cut -f8 -d'/' | tr '[:upper:]' '[:lower:]'); echo "genome is $genome" + +#genome=$(echo $gtfFile | cut -f8 -d'/' | tr '[:upper:]' '[:lower:]'); echo "genome is $genome" ## make sure to use temp-r to avoid file locking problems #export R_LIBS=/tmp/r_index + +genome=$(scala -e ' +val gtfFile = args(0); //val gtfFile="mm10_igenomes_pc.gtf" +val pattern = "mm10|mm9|hg19|zv9".r +println(pattern.findFirstIn(gtfFile).getOrElse("")) +' $(readlink -f $gtfFile) +) +echo $genome + + + echo ' require(cummeRbund) dbDir=commandArgs(T)[1] diff --git a/misc/guess_species_from_gtf.scalah b/misc/guess_species_from_gtf.scalah new file mode 100755 index 0000000000000000000000000000000000000000..70188fdef2ad388b6b0745fcab5e84836422a859 --- /dev/null +++ b/misc/guess_species_from_gtf.scalah @@ -0,0 +1,37 @@ +#!/bin/sh +exec scalas "$0" "$@" +!# + +/** Work in progress: Guess the species from a gtf file. By name first and then by file content. A more simplistic approach is already implementd in dge_workflow/dge_utils.sh + */ + +import java.io.File + +import scala.io.Source + +// http://alvinalexander.com/scala/scala-shell-script-command-line-arguments-args +val gtfFile = args(1) +//val gtfFile="mm10_igenomes_pc.gtf" + +val pattern = "mm10|mm9|h19|zv9".r +val genomeByName = pattern.findFirstIn(gtfFile) + +if (genomeByName.isEmpty) { + System.exit(1) +} + +def guessFromContent(gtfFile: File): Option[String] = { + // Source.fromString(s"grep ENSMUSG $gtfFile | "!!).getLines().hasNext + // Bash.evalCapture(s"grep ENSMUSG $gtfFile | wc -l") + + if (Source.fromFile(gtfFile).getLines().exists(_.contains("ENSMUSG"))) return Some("mouse") + if (Source.fromFile(gtfFile).getLines().exists(_.contains("ENSCAFG"))) return Some("dog") + + None +} + + +genomeByName.get match { + case "mm9" => +} +println(genomeByName) \ No newline at end of file