Skip to content
Snippets Groups Projects

Allow masking character

Merged chow requested to merge 1-version-release-workflow-maskingfix into 1-version-release-workflow
2 files
+ 8
3
Compare changes
  • Side-by-side
  • Inline
Files
2
@@ -45,7 +45,10 @@ class SeqPairSimilarity:
kmer_count_map = {}
for i in range(len(sequence) - k_value + 1):
kmer = sequence[i: i + k_value]
if kmer in kmer_count_map:
# residues can be masked with the '*' character. K-mers that contain masked residues should be ignored
if '*' in kmer:
continue
elif kmer in kmer_count_map:
kmer_count_map[kmer] += 1
else:
kmer_count_map[kmer] = 1
@@ -211,7 +214,7 @@ class SeqPairSimilarity:
column_scores = self.get_axis_score(self.kmer_count_map_2, self.kmer_count_map_1, self.similarity_matrix,
reverse=True)
final_score = (sum(row_scores) + sum(column_scores)) / (
len(self.sequence1) + len(self.sequence2) - 2 * (self.k - 1))
sum(self.kmer_count_map_1.values()) + sum(self.kmer_count_map_2.values()))
return final_score
def get_similarity_score__sparse(self):
Loading