Skip to content
Snippets Groups Projects
Commit 5b0bf6dc authored by Maxim Scheremetjew's avatar Maxim Scheremetjew
Browse files

feat: Implemented client to access UniProt REST API to fetch protein sequences...

feat: Implemented client to access UniProt REST API to fetch protein sequences by UniProt accession.
parent 4984d369
No related branches found
No related tags found
1 merge request!1feat: Implemented client to access UniProt REST API to fetch protein sequences by UniProt accession.
"""app module"""
biopython==1.81
# via -r requirements.in
black==22.3.0 black==22.3.0
# via -r requirements.in # via -r requirements.in
certifi==2022.12.7
# via requests
charset-normalizer==3.1.0
# via requests
click==8.1.3 click==8.1.3
# via black # via black
coverage[toml]==7.2.3 coverage[toml]==7.2.3
...@@ -23,6 +29,8 @@ flake8-print==5.0.0 ...@@ -23,6 +29,8 @@ flake8-print==5.0.0
# via -r requirements.in # via -r requirements.in
flake8-todo==0.7 flake8-todo==0.7
# via -r requirements.in # via -r requirements.in
idna==3.4
# via requests
iniconfig==2.0.0 iniconfig==2.0.0
# via pytest # via pytest
isort==4.3.21 isort==4.3.21
...@@ -39,6 +47,8 @@ mock==5.0.2 ...@@ -39,6 +47,8 @@ mock==5.0.2
# via -r requirements.in # via -r requirements.in
mypy-extensions==1.0.0 mypy-extensions==1.0.0
# via black # via black
numpy==1.24.2
# via biopython
packaging==23.1 packaging==23.1
# via # via
# marshmallow # marshmallow
...@@ -76,6 +86,8 @@ python-dotenv==1.0.0 ...@@ -76,6 +86,8 @@ python-dotenv==1.0.0
# via environs # via environs
rapidfuzz==3.0.0 rapidfuzz==3.0.0
# via levenshtein # via levenshtein
requests==2.28.2
# via -r requirements.in
retry-decorator==1.1.1 retry-decorator==1.1.1
# via -r requirements.in # via -r requirements.in
six==1.16.0 six==1.16.0
...@@ -84,3 +96,5 @@ six==1.16.0 ...@@ -84,3 +96,5 @@ six==1.16.0
# python-dateutil # python-dateutil
snowballstemmer==2.2.0 snowballstemmer==2.2.0
# via pydocstyle # via pydocstyle
urllib3==1.26.15
# via requests
"""Settings for the Levenshtein distance service.""" """Settings for the Levenshtein distance service."""
import logging.config import logging.config
from typing import Dict
from environs import Env from environs import Env
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
env = Env() env = Env()
def get_logging_config(env, loggers_to_silent=None): def get_logging_config(env, loggers_to_silent=None):
"""Return the python logging configuration based on environment variables. """Return the python logging configuration based on environment variables.
The log level for the given loggers_to_silent will be set to INFO. The log level for the given loggers_to_silent will be set to INFO.
Use this for loggers that at DEBUG level put too much entries that we never take care. Use this for loggers that at DEBUG level put too much entries that we never take care.
The log level for specific loggers can be more customized with the CUSTOM_LOGGING environment The log level for specific loggers can be more customized with the CUSTOM_LOGGING environment
variable, providing a list of logger and level in the form: CUSTOM_LOGGING=<logger>=<LEVEL> variable, providing a list of logger and level in the form: CUSTOM_LOGGING=<logger>=<LEVEL>
This will overwrite the configuration set because of loggers_to_silent (so can be used to put This will overwrite the configuration set because of loggers_to_silent (so can be used to put
...@@ -85,3 +88,5 @@ LOGGING = get_logging_config( ...@@ -85,3 +88,5 @@ LOGGING = get_logging_config(
], ],
) )
logging.config.dictConfig(LOGGING) logging.config.dictConfig(LOGGING)
UNIPROT_RESTAPI_ENDPOINT = env.str("UNIPROT_RESTAPI_ENDPOINT", default="https://rest.uniprot.org")
"""tests module"""
"""End to end tests."""
"""Integration tests."""
"""Unit tests."""
"""Fixtures for app unit tests."""
import pytest
from faker import Faker
fake = Faker()
@pytest.fixture
def mock_protein_sequence():
return "MIDVLRTSLDECKNEKGLKILTQEDALEYLMTKMRVIKKYSETDKNVRQQQKRLHLKTLLETGFIPHVENDM"
@pytest.fixture
def mock_fasta_formatted_sequence(mock_protein_sequence):
fasta_header = ">tr|A0A3G5A511|A0A3G5A511_9VIRU DNA-directed RNA polymerase OS=Harvfovirus sp OX=2487768 GN=Harvfovirus44_2 PE=3 SV=1"
return fasta_header+f"\n{mock_protein_sequence}"
@pytest.fixture
def mock_valid_uniprot_client_request(mocker, mock_fasta_formatted_sequence):
"""Mock requests.get method to return 200 status code and valid text."""
class MockResponse:
def __init__(self, text, status_code):
self.text = text
self.status_code = status_code
def text(self):
return self.text()
return mocker.patch("app.uniprot_api.requests.get", return_value=MockResponse(mock_fasta_formatted_sequence, 200))
@pytest.fixture
def mock_invalid_uniprot_client_request(mocker):
"""Mock requests.get method to return 500 status code."""
class MockResponse:
def __init__(self, text, status_code):
self.text = text
self.status_code = status_code
def text(self):
return self.text()
return mocker.patch("app.uniprot_api.requests.get", return_value=MockResponse("", 500))
@pytest.fixture
def mock_logger_uniprot_api(mocker): # noqa: D103
return mocker.patch("app.uniprot_api.logger")
"""Tests for UniProt api client."""
from unittest.mock import call
import pytest
from app.uniprot_api import uniprot_client
@pytest.mark.parametrize(
"uniprot_id, expected_call_kwargs, expected_result",
[
(
"A0A3G5A511",
{'params': {'format': 'fasta'}},
"MIDVLRTSLDECKNEKGLKILTQEDALEYLMTKMRVIKKYSETDKNVRQQQKRLHLKTLLETGFIPHVENDM"
),
],
)
def test_fetch_sequence_by_uniprot_id_should_succeed(
mock_valid_uniprot_client_request,
uniprot_id,
expected_call_kwargs,
expected_result
):
"""Test 'fetch_sequence_by_uniprot_id' method should succeed and return expected result."""
# ... given
# ... a UniProt API client
# when ... we call `fetch_sequence_by_uniprot_id`
actual = uniprot_client.fetch_sequence_by_uniprot_id(uniprot_id)
# then ... the request should be constructed correctly
actual_call_kwargs = mock_valid_uniprot_client_request.call_args.kwargs
assert actual_call_kwargs == expected_call_kwargs
# and return the expected result
assert actual == expected_result
@pytest.mark.parametrize(
"uniprot_id",
[
"A0A3G5A511",
],
)
def test_fetch_sequence_by_uniprot_id_should_fail(
mock_invalid_uniprot_client_request,
mock_logger_uniprot_api,
uniprot_id,
):
"""Test 'fetch_sequence_by_uniprot_id' method should fail and log error message."""
# ... given
# ... a UniProt API client
# when ... we call `fetch_sequence_by_uniprot_id`
actual = uniprot_client.fetch_sequence_by_uniprot_id(uniprot_id)
# then the result should be None
assert not actual
# and the expected log messages should be called
mock_logger_uniprot_api.error.assert_has_calls(
[
call("Request failed with an internal server error!"),
call("Request failed with the following status code: 500"),
]
)
import logging
from io import StringIO
import requests
from Bio import SeqIO
from requests import RequestException
from retry_decorator import retry
from app import settings
logger = logging.getLogger(__name__)
class _UniprotClient:
"""Wrapper around UniProt REST API."""
def __init__(self):
self.base_url: str = settings.UNIPROT_RESTAPI_ENDPOINT
@retry((RequestException,), tries=3)
def fetch_sequence_by_uniprot_id(self, uniprot_id: str) -> str:
"""Fetch and return protein sequence for the given UniProt Id.
:param uniprot_id: UniProt accession, e.g. A0A3G5A511
:return: protein sequence
"""
request_url = self.base_url + f"/uniprotkb/{uniprot_id}"
response = self._send_request(request_url, result_format="fasta")
if response.status_code == requests.codes.ok:
try:
with StringIO(response.text) as handler:
records = list(SeqIO.parse(handler, "fasta"))
if len(records) == 1:
return str(records[0].seq)
except BaseException as err:
logger.exception(f"Unexpected {err=}, {type(err)=}")
raise
else:
logger.error(f"Request failed with the following status code: {response.status_code}")
@staticmethod
def _send_request(request_url: str, result_format: str) -> requests.Response:
response = requests.get(
request_url,
params={
"format": f"{result_format}",
},
)
if response.status_code in (
requests.codes.no_content,
requests.codes.not_found,
):
logger.warning("Entry not found in REST API")
elif response.status_code in (
requests.codes.unauthorized,
requests.codes.forbidden,
):
logger.error("Request not authorized.")
elif response.status_code == requests.codes.internal_server_error:
logger.error("Request failed with an internal server error!")
return response
uniprot_client = _UniprotClient()
if __name__ == '__main__':
logger.info(uniprot_client.fetch_sequence_by_uniprot_id("A0A3G5A511"))
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
environs~=4.1.0 environs~=4.1.0
retry-decorator==1.1.1 retry-decorator==1.1.1
Levenshtein~=0.21.0 Levenshtein~=0.21.0
requests~=2.28.0
biopython~=1.81
# QA # QA
flake8==4.0.1 flake8==4.0.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment