diff --git a/R/core_commons.R b/R/core_commons.R index 40a5a2784c1eba912e9c45900c5fdddd000f5360..6731b9ebff9527ca435f3c2e6d626199d2dcdf95 100644 --- a/R/core_commons.R +++ b/R/core_commons.R @@ -98,7 +98,7 @@ rownames2column <- function(df, colname){ df <- as.df(df) df$tttt <- rownames(df); rownames(df) <- NULL; - rename(df, c(tttt=colname)) + plyr::rename(df, c(tttt=colname)) } diff --git a/bash/bioinfo_utils.sh b/bash/bioinfo_utils.sh index b8c74939255b6aaf138dbab452473a6618ee00c1..64ba9c3c0e6f32f964b8a3315268e99b17dabacc 100644 --- a/bash/bioinfo_utils.sh +++ b/bash/bioinfo_utils.sh @@ -181,3 +181,25 @@ BlastProgress(){ done } export -f BlastProgress + + +## just retains sequences whose id is in id-file (format: 1id per line) +FilterFastaByIDFile(){ +python -c ' +from Bio import SeqIO +import sys + +#http://stackoverflow.com/questions/3925614/how-do-you-read-a-file-into-a-list-in-python +with open(sys.argv[1]) as f: + some_list = f.read().splitlines() + +for record in SeqIO.parse(sys.stdin, "fasta"): +# recordID=record.description.split(" ")[1] + recordID=record.id +# print "processing" + recordID + + # http://stackoverflow.com/questions/3437059/does-python-have-a-string-contains-method + if recordID in some_list: print record.format("fasta") +' $1; +} +export -f FilterFastaByIDFile