dateparser icon indicating copy to clipboard operation
dateparser copied to clipboard

Add mark to tests which require network access

Open mcepl opened this issue 2 years ago • 1 comments

We are in the process of packaging upgraded version of this package into openSUSE/Factory and our build environment is sandboxed and has no network access, to ensure we don't leak any impurities into the build process.

When running the test suite it fails like this (the other failed test is similar):

[   55s] ______ CustomLangDetectParserTest.test_custom_language_detect_fast_text_1 ______
[   55s]
[   55s] self = <urllib.request.HTTPSHandler object at 0x7f7dab3f3250>
[   55s] http_class = <class 'http.client.HTTPSConnection'>
[   55s] req = <urllib.request.Request object at 0x7f7dae34a1f0>
[   55s] http_conn_args = {'check_hostname': None, 'context': None}
[   55s] host = 'dl.fbaipublicfiles.com'
[   55s] h = <http.client.HTTPSConnection object at 0x7f7dae34aa30>
[   55s]
[   55s]     def do_open(self, http_class, req, **http_conn_args):
[   55s]         """Return an HTTPResponse object for the request, using http_class.
[   55s]
[   55s]         http_class must implement the HTTPConnection API from http.client.
[   55s]         """
[   55s]         host = req.host
[   55s]         if not host:
[   55s]             raise URLError('no host given')
[   55s]
[   55s]         # will parse host:port
[   55s]         h = http_class(host, timeout=req.timeout, **http_conn_args)
[   55s]         h.set_debuglevel(self._debuglevel)
[   55s]
[   55s]         headers = dict(req.unredirected_hdrs)
[   55s]         headers.update({k: v for k, v in req.headers.items()
[   55s]                         if k not in headers})
[   55s]
[   55s]         # TODO(jhylton): Should this be redesigned to handle
[   55s]         # persistent connections?
[   55s]
[   55s]         # We want to make an HTTP/1.1 request, but the addinfourl
[   55s]         # class isn't prepared to deal with a persistent connection.
[   55s]         # It will try to read all remaining data from the socket,
[   55s]         # which will block while the server waits for the next request.
[   55s]         # So make sure the connection gets closed after the (only)
[   55s]         # request.
[   55s]         headers["Connection"] = "close"
[   55s]         headers = {name.title(): val for name, val in headers.items()}
[   55s]
[   55s]         if req._tunnel_host:
[   55s]             tunnel_headers = {}
[   55s]             proxy_auth_hdr = "Proxy-Authorization"
[   55s]             if proxy_auth_hdr in headers:
[   55s]                 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
[   55s]                 # Proxy-Authorization should not be sent to origin
[   55s]                 # server.
[   55s]                 del headers[proxy_auth_hdr]
[   55s]             h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
[   55s]
[   55s]         try:
[   55s]             try:
[   55s] >               h.request(req.get_method(), req.selector, req.data, headers,
[   55s]                           encode_chunked=req.has_header('Transfer-encoding'))
[   55s]
[   55s] /usr/lib64/python3.9/urllib/request.py:1346:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <http.client.HTTPSConnection object at 0x7f7dae34aa30>, method = 'GET'
[   55s] url = '/fasttext/supervised-models/lid.176.ftz', body = None
[   55s] headers = {'Connection': 'close', 'Host': 'dl.fbaipublicfiles.com', 'User-Agent': 'Python-urllib/3.9'}
[   55s]
[   55s]     def request(self, method, url, body=None, headers={}, *,
[   55s]                 encode_chunked=False):
[   55s]         """Send a complete request to the server."""
[   55s] >       self._send_request(method, url, body, headers, encode_chunked)
[   55s]
[   55s] /usr/lib64/python3.9/http/client.py:1285:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <http.client.HTTPSConnection object at 0x7f7dae34aa30>, method = 'GET'
[   55s] url = '/fasttext/supervised-models/lid.176.ftz', body = None
[   55s] headers = {'Connection': 'close', 'Host': 'dl.fbaipublicfiles.com', 'User-Agent': 'Python-urllib/3.9'}
[   55s] encode_chunked = False
[   55s]
[   55s]     def _send_request(self, method, url, body, headers, encode_chunked):
[   55s]         # Honor explicitly requested Host: and Accept-Encoding: headers.
[   55s]         header_names = frozenset(k.lower() for k in headers)
[   55s]         skips = {}
[   55s]         if 'host' in header_names:
[   55s]             skips['skip_host'] = 1
[   55s]         if 'accept-encoding' in header_names:
[   55s]             skips['skip_accept_encoding'] = 1
[   55s]
[   55s]         self.putrequest(method, url, **skips)
[   55s]
[   55s]         # chunked encoding will happen if HTTP/1.1 is used and either
[   55s]         # the caller passes encode_chunked=True or the following
[   55s]         # conditions hold:
[   55s]         # 1. content-length has not been explicitly set
[   55s]         # 2. the body is a file or iterable, but not a str or bytes-like
[   55s]         # 3. Transfer-Encoding has NOT been explicitly set by the caller
[   55s]
[   55s]         if 'content-length' not in header_names:
[   55s]             # only chunk body if not explicitly set for backwards
[   55s]             # compatibility, assuming the client code is already handling the
[   55s]             # chunking
[   55s]             if 'transfer-encoding' not in header_names:
[   55s]                 # if content-length cannot be automatically determined, fall
[   55s]                 # back to chunked encoding
[   55s]                 encode_chunked = False
[   55s]                 content_length = self._get_content_length(body, method)
[   55s]                 if content_length is None:
[   55s]                     if body is not None:
[   55s]                         if self.debuglevel > 0:
[   55s]                             print('Unable to determine size of %r' % body)
[   55s]                         encode_chunked = True
[   55s]                         self.putheader('Transfer-Encoding', 'chunked')
[   55s]                 else:
[   55s]                     self.putheader('Content-Length', str(content_length))
[   55s]         else:
[   55s]             encode_chunked = False
[   55s]
[   55s]         for hdr, value in headers.items():
[   55s]             self.putheader(hdr, value)
[   55s]         if isinstance(body, str):
[   55s]             # RFC 2616 Section 3.7.1 says that text default has a
[   55s]             # default charset of iso-8859-1.
[   55s]             body = _encode(body, 'body')
[   55s] >       self.endheaders(body, encode_chunked=encode_chunked)
[   55s]
[   55s] /usr/lib64/python3.9/http/client.py:1331:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <http.client.HTTPSConnection object at 0x7f7dae34aa30>
[   55s] message_body = None
[   55s]
[   55s]     def endheaders(self, message_body=None, *, encode_chunked=False):
[   55s]         """Indicate that the last header line has been sent to the server.
[   55s]
[   55s]         This method sends the request to the server.  The optional message_body
[   55s]         argument can be used to pass a message body associated with the
[   55s]         request.
[   55s]         """
[   55s]         if self.__state == _CS_REQ_STARTED:
[   55s]             self.__state = _CS_REQ_SENT
[   55s]         else:
[   55s]             raise CannotSendHeader()
[   55s] >       self._send_output(message_body, encode_chunked=encode_chunked)
[   55s]
[   55s] /usr/lib64/python3.9/http/client.py:1280:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <http.client.HTTPSConnection object at 0x7f7dae34aa30>
[   55s] message_body = None, encode_chunked = False
[   55s]
[   55s]     def _send_output(self, message_body=None, encode_chunked=False):
[   55s]         """Send the currently buffered request and clear the buffer.
[   55s]
[   55s]         Appends an extra \\r\\n to the buffer.
[   55s]         A message_body may be specified, to be appended to the request.
[   55s]         """
[   55s]         self._buffer.extend((b"", b""))
[   55s]         msg = b"\r\n".join(self._buffer)
[   55s]         del self._buffer[:]
[   55s] >       self.send(msg)
[   55s]
[   55s] /usr/lib64/python3.9/http/client.py:1040:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <http.client.HTTPSConnection object at 0x7f7dae34aa30>
[   55s] data = b'GET /fasttext/supervised-models/lid.176.ftz HTTP/1.1\r\nAccept-Encoding: identity\r\nHost: dl.fbaipublicfiles.com\r\nUser-Agent: Python-urllib/3.9\r\nConnection: close\r\n\r\n'
[   55s]
[   55s]     def send(self, data):
[   55s]         """Send `data' to the server.
[   55s]         ``data`` can be a string object, a bytes object, an array object, a
[   55s]         file-like object that supports a .read() method, or an iterable object.
[   55s]         """
[   55s]
[   55s]         if self.sock is None:
[   55s]             if self.auto_open:
[   55s] >               self.connect()
[   55s]
[   55s] /usr/lib64/python3.9/http/client.py:980:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <http.client.HTTPSConnection object at 0x7f7dae34aa30>
[   55s]
[   55s]     def connect(self):
[   55s]         "Connect to a host on a given (SSL) port."
[   55s]
[   55s] >       super().connect()
[   55s]
[   55s] /usr/lib64/python3.9/http/client.py:1447:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <http.client.HTTPSConnection object at 0x7f7dae34aa30>
[   55s]
[   55s]     def connect(self):
[   55s]         """Connect to the host and port specified in __init__."""
[   55s] >       self.sock = self._create_connection(
[   55s]             (self.host,self.port), self.timeout, self.source_address)
[   55s]
[   55s] /usr/lib64/python3.9/http/client.py:946:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] address = ('dl.fbaipublicfiles.com', 443)
[   55s] timeout = <object object at 0x7f7db0857fb0>, source_address = None
[   55s]
[   55s]     def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT,
[   55s]                           source_address=None):
[   55s]         """Connect to *address* and return the socket object.
[   55s]
[   55s]         Convenience function.  Connect to *address* (a 2-tuple ``(host,
[   55s]         port)``) and return the socket object.  Passing the optional
[   55s]         *timeout* parameter will set the timeout on the socket instance
[   55s]         before attempting to connect.  If no *timeout* is supplied, the
[   55s]         global default timeout setting returned by :func:`getdefaulttimeout`
[   55s]         is used.  If *source_address* is set it must be a tuple of (host, port)
[   55s]         for the socket to bind as a source address before making the connection.
[   55s]         A host of '' or port 0 tells the OS to use the default.
[   55s]         """
[   55s]
[   55s]         host, port = address
[   55s]         err = None
[   55s] >       for res in getaddrinfo(host, port, 0, SOCK_STREAM):
[   55s]
[   55s] /usr/lib64/python3.9/socket.py:823:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] host = 'dl.fbaipublicfiles.com', port = 443, family = 0
[   55s] type = <SocketKind.SOCK_STREAM: 1>, proto = 0, flags = 0
[   55s]
[   55s]     def getaddrinfo(host, port, family=0, type=0, proto=0, flags=0):
[   55s]         """Resolve host and port into list of address info entries.
[   55s]
[   55s]         Translate the host/port argument into a sequence of 5-tuples that contain
[   55s]         all the necessary arguments for creating a socket connected to that service.
[   55s]         host is a domain name, a string representation of an IPv4/v6 address or
[   55s]         None. port is a string service name such as 'http', a numeric port number or
[   55s]         None. By passing None as the value of host and port, you can pass NULL to
[   55s]         the underlying C API.
[   55s]
[   55s]         The family, type and proto arguments can be optionally specified in order to
[   55s]         narrow the list of addresses returned. Passing zero as a value for each of
[   55s]         these arguments selects the full range of results.
[   55s]         """
[   55s]         # We override this function since we want to translate the numeric family
[   55s]         # and socket type values to enum constants.
[   55s]         addrlist = []
[   55s] >       for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
[   55s] E       socket.gaierror: [Errno -3] Temporary failure in name resolution
[   55s]
[   55s] /usr/lib64/python3.9/socket.py:954: gaierror
[   55s]
[   55s] During handling of the above exception, another exception occurred:
[   55s]
[   55s] a = (<tests.test_language_detect.CustomLangDetectParserTest testMethod=test_custom_language_detect_fast_text_1>,)
[   55s]
[   55s]     @wraps(func)
[   55s]     def standalone_func(*a):
[   55s] >       return func(*(a + p.args), **p.kwargs)
[   55s]
[   55s] /usr/lib/python3.9/site-packages/parameterized/parameterized.py:637:
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s] tests/test_language_detect.py:26: in test_custom_language_detect_fast_text
[   55s]     self.result = fast_text_detect_languages(dt_string, confidence_threshold)
[   55s] dateparser/custom_language_detection/fasttext.py:37: in detect_languages
[   55s]     _language_parser = _load_fasttext_model()
[   55s] dateparser/custom_language_detection/fasttext.py:27: in _load_fasttext_model
[   55s]     fasttext_downloader(_DEFAULT_MODEL)
[   55s] dateparser_cli/fasttext_manager.py:28: in fasttext_downloader
[   55s]     urllib.request.urlretrieve(model_url, models_directory_path)
[   55s] /usr/lib64/python3.9/urllib/request.py:239: in urlretrieve
[   55s]     with contextlib.closing(urlopen(url, data)) as fp:
[   55s] /usr/lib64/python3.9/urllib/request.py:214: in urlopen
[   55s]     return opener.open(url, data, timeout)
[   55s] /usr/lib64/python3.9/urllib/request.py:517: in open
[   55s]     response = self._open(req, data)
[   55s] /usr/lib64/python3.9/urllib/request.py:534: in _open
[   55s]     result = self._call_chain(self.handle_open, protocol, protocol +
[   55s] /usr/lib64/python3.9/urllib/request.py:494: in _call_chain
[   55s]     result = func(*args)
[   55s] /usr/lib64/python3.9/urllib/request.py:1389: in https_open
[   55s]     return self.do_open(http.client.HTTPSConnection, req,
[   55s] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[   55s]
[   55s] self = <urllib.request.HTTPSHandler object at 0x7f7dab3f3250>
[   55s] http_class = <class 'http.client.HTTPSConnection'>
[   55s] req = <urllib.request.Request object at 0x7f7dae34a1f0>
[   55s] http_conn_args = {'check_hostname': None, 'context': None}
[   55s] host = 'dl.fbaipublicfiles.com'
[   55s] h = <http.client.HTTPSConnection object at 0x7f7dae34aa30>
[   55s]
[   55s]     def do_open(self, http_class, req, **http_conn_args):
[   55s]         """Return an HTTPResponse object for the request, using http_class.
[   55s]
[   55s]         http_class must implement the HTTPConnection API from http.client.
[   55s]         """
[   55s]         host = req.host
[   55s]         if not host:
[   55s]             raise URLError('no host given')
[   55s]
[   55s]         # will parse host:port
[   55s]         h = http_class(host, timeout=req.timeout, **http_conn_args)
[   55s]         h.set_debuglevel(self._debuglevel)
[   55s]
[   55s]         headers = dict(req.unredirected_hdrs)
[   55s]         headers.update({k: v for k, v in req.headers.items()
[   55s]                         if k not in headers})
[   55s]
[   55s]         # TODO(jhylton): Should this be redesigned to handle
[   55s]         # persistent connections?
[   55s]
[   55s]         # We want to make an HTTP/1.1 request, but the addinfourl
[   55s]         # class isn't prepared to deal with a persistent connection.
[   55s]         # It will try to read all remaining data from the socket,
[   55s]         # which will block while the server waits for the next request.
[   55s]         # So make sure the connection gets closed after the (only)
[   55s]         # request.
[   55s]         headers["Connection"] = "close"
[   55s]         headers = {name.title(): val for name, val in headers.items()}
[   55s]
[   55s]         if req._tunnel_host:
[   55s]             tunnel_headers = {}
[   55s]             proxy_auth_hdr = "Proxy-Authorization"
[   55s]             if proxy_auth_hdr in headers:
[   55s]                 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
[   55s]                 # Proxy-Authorization should not be sent to origin
[   55s]                 # server.
[   55s]                 del headers[proxy_auth_hdr]
[   55s]             h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
[   55s]
[   55s]         try:
[   55s]             try:
[   55s]                 h.request(req.get_method(), req.selector, req.data, headers,
[   55s]                           encode_chunked=req.has_header('Transfer-encoding'))
[   55s]             except OSError as err: # timeout error
[   55s] >               raise URLError(err)
[   55s] E               urllib.error.URLError: <urlopen error [Errno -3] Temporary failure in name resolution>
[   55s]
[   55s] /usr/lib64/python3.9/urllib/request.py:1349: URLError

Complete build log shows all packages used and specific steps taken.

With this patch we are at least able to mark this test as network requiring and skip it en masse:

---
 pytest.ini                    |    3 +++
 tests/test_language_detect.py |    2 ++
 2 files changed, 5 insertions(+)

--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+markers =
+    network: tests requiring network connection
--- a/tests/test_language_detect.py
+++ b/tests/test_language_detect.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 from unittest.mock import Mock
 import unittest
+import pytest

 from parameterized import parameterized, param

@@ -18,6 +19,7 @@ class CustomLangDetectParserTest(unittes
     def check_is_returned_list(self):
         self.assertEqual(type(self.result), list)

+    @pytest.mark.network
     @parameterized.expand([
         param(dt_string="14 June 2020", confidence_threshold=0.0),
         param(dt_string="26 July 2021", confidence_threshold=0.0)

Unfortunately, that makes this package hard requiring pytest, but I cannot find purely unittest-based marking (we probably could do unittest.skipIf based on the environmental variable).

mcepl avatar May 24 '22 21:05 mcepl

Actually, this version of the patch doesn't require pytest:

---
 tests/test_language_detect.py |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

--- a/tests/test_language_detect.py
+++ b/tests/test_language_detect.py
@@ -1,6 +1,8 @@
+import os
+import unittest
+
 from datetime import datetime
 from unittest.mock import Mock
-import unittest

 from parameterized import parameterized, param

@@ -13,6 +15,7 @@ from dateparser.search import search_dat
 detect_languages = Mock()
 detect_languages.return_value = ["en"]

+NO_NETWORK = int(os.environ.get('NO_NETWORK', 0)) == 1

 class CustomLangDetectParserTest(unittest.TestCase):
     def check_is_returned_list(self):
@@ -22,6 +25,7 @@ class CustomLangDetectParserTest(unittes
         param(dt_string="14 June 2020", confidence_threshold=0.0),
         param(dt_string="26 July 2021", confidence_threshold=0.0)
     ])
+    @unittest.skipIf(NO_NETWORK, "Test requires network access")
     def test_custom_language_detect_fast_text(self, dt_string, confidence_threshold):
         self.result = fast_text_detect_languages(dt_string, confidence_threshold)
         self.check_is_returned_list()

mcepl avatar May 24 '22 22:05 mcepl