From 8cf68c24a74b7e7b59400fb9b729199584169c42 Mon Sep 17 00:00:00 2001 From: Holger Brandl <brandl@mpi-cbg.de> Date: Fri, 12 Sep 2014 14:59:13 +0200 Subject: [PATCH] cont. expression data prep --- R/core_commons.R | 2 +- bash/bioinfo_utils.sh | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/R/core_commons.R b/R/core_commons.R index 40a5a27..6731b9e 100644 --- a/R/core_commons.R +++ b/R/core_commons.R @@ -98,7 +98,7 @@ rownames2column <- function(df, colname){ df <- as.df(df) df$tttt <- rownames(df); rownames(df) <- NULL; - rename(df, c(tttt=colname)) + plyr::rename(df, c(tttt=colname)) } diff --git a/bash/bioinfo_utils.sh b/bash/bioinfo_utils.sh index b8c7493..64ba9c3 100644 --- a/bash/bioinfo_utils.sh +++ b/bash/bioinfo_utils.sh @@ -181,3 +181,25 @@ BlastProgress(){ done } export -f BlastProgress + + +## just retains sequences whose id is in id-file (format: 1id per line) +FilterFastaByIDFile(){ +python -c ' +from Bio import SeqIO +import sys + +#http://stackoverflow.com/questions/3925614/how-do-you-read-a-file-into-a-list-in-python +with open(sys.argv[1]) as f: + some_list = f.read().splitlines() + +for record in SeqIO.parse(sys.stdin, "fasta"): +# recordID=record.description.split(" ")[1] + recordID=record.id +# print "processing" + recordID + + # http://stackoverflow.com/questions/3437059/does-python-have-a-string-contains-method + if recordID in some_list: print record.format("fasta") +' $1; +} +export -f FilterFastaByIDFile -- GitLab