From 8cf68c24a74b7e7b59400fb9b729199584169c42 Mon Sep 17 00:00:00 2001
From: Holger Brandl <brandl@mpi-cbg.de>
Date: Fri, 12 Sep 2014 14:59:13 +0200
Subject: [PATCH] cont. expression data prep

---
 R/core_commons.R      |  2 +-
 bash/bioinfo_utils.sh | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/R/core_commons.R b/R/core_commons.R
index 40a5a27..6731b9e 100644
--- a/R/core_commons.R
+++ b/R/core_commons.R
@@ -98,7 +98,7 @@ rownames2column <- function(df, colname){
     df <- as.df(df)
     df$tttt <- rownames(df);
     rownames(df) <- NULL;
-    rename(df, c(tttt=colname))
+    plyr::rename(df, c(tttt=colname))
 }
 
 
diff --git a/bash/bioinfo_utils.sh b/bash/bioinfo_utils.sh
index b8c7493..64ba9c3 100644
--- a/bash/bioinfo_utils.sh
+++ b/bash/bioinfo_utils.sh
@@ -181,3 +181,25 @@ BlastProgress(){
     done
 }
 export -f BlastProgress
+
+
+## just retains sequences whose id is in id-file (format: 1id per line)
+FilterFastaByIDFile(){
+python -c '
+from Bio import SeqIO
+import sys
+
+#http://stackoverflow.com/questions/3925614/how-do-you-read-a-file-into-a-list-in-python
+with open(sys.argv[1]) as f:
+    some_list = f.read().splitlines()
+
+for record in SeqIO.parse(sys.stdin, "fasta"):
+#    recordID=record.description.split(" ")[1]
+    recordID=record.id
+#    print "processing" + recordID
+
+    # http://stackoverflow.com/questions/3437059/does-python-have-a-string-contains-method
+    if recordID in some_list: print record.format("fasta")
+' $1;
+}
+export -f FilterFastaByIDFile
-- 
GitLab