Commit a0052f9c authored by Lena Hersemann's avatar Lena Hersemann

fixed bug: now the full gene name is extracted and reported from the fasta header

parent 99429f00
......@@ -331,8 +331,8 @@ write_tsv(sample_info, paste0(results_prefix, ".feature_sample_information.txt")
protein_info <- sample_info %>% distinct(protein_ids, fasta_headers) %>% group_by(protein_ids) %>%
filter(max(nchar(fasta_headers))==nchar(fasta_headers)) %>% slice(1) %>% ungroup() %>%
rowwise() %>%
mutate(gene_name = paste(unlist(str_extract_all(fasta_headers, "GN=([:alnum:]+)")), collapse = "; ") %>% str_replace_all(., "GN=", ""),
protein_acc = paste(unlist(str_extract_all(protein_ids, "[trsp]+\\|([:alnum:]+)")), collapse = "; ") %>% str_replace_all(., "sp|tr|[|]", ""))
mutate(gene_name = paste(unlist(str_extract_all(fasta_headers, "GN=([:alnum:]+\\-?[:alnum:]?\\-?[:alnum:]?)")), collapse = "; ") %>% str_replace_all(., "GN=", ""),
protein_acc = paste(unlist(str_extract_all(protein_ids, "[trsp]+\\|([:alnum:]+\\-?[:alnum:]?\\-?[:alnum:]?)")), collapse = "; ") %>% str_replace_all(., "sp|tr|[|]", ""))
write_tsv(protein_info, paste0(results_prefix, ".feature_information.txt"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment