actinia-core
actinia-core copied to clipboard
Better catch of redis connection errors
When the redis db is not reachable during processing, the process fails (which is ok for now from my points of view) and sometimes actinia itself is not responding anymore, even after reconnection. This should not be the case and a restart should not be necessary. A log shows a lot of uncatched exceptions. Some are duplicate here, but I leave it all because line numbers differ slightly and this was the whole log until actinia_core was not reachable anymore.
{"time": "2021-08-04T08:13:43.919840Z", "level": "INFO", "component": "actinia-core", "module": "redis_fluentd_logger_base", "message": "Running executable sleep with parameters ['3700'] for 55.2963 seconds", "pathname": "/usr/lib/python3.8/site-packages/actinia_core/resources/common/redis_fluentd_logger_base.py", "lineno": 91, "processName": "Process-1:1", "threadName": "MainThread", "node": "1532eff09edb", "status": "running", "user_id": "actinia-gdi", "resource_id": "resource_id-dd81df9f-340c-4537-a990-95b0dfb31815", "accept_timestamp": 1628064768.3832097, "accept_datetime": "2021-08-04 08:12:48.383211", "timestamp": 1628064823.9184768, "datetime": "2021-08-04 08:13:43.918479", "time_delta": 55.535290479660034, "progress": {"step": 1, "num_of_steps": 1}, "process_chain_list": [], "http_code": 200, "urls": {"resources": [], "status": "http://0.0.0.0:8088/api/v1/resources/actinia-gdi/resource_id-dd81df9f-340c-4537-a990-95b0dfb31815"}, "api_info": {"endpoint": "asyncephemeralexportresource", "method": "POST", "path": "/api/v1/locations/utm32n/processing_async_export", "request_url": "http://0.0.0.0:8088/api/v1/locations/utm32n/processing_async_export"}, "logger": "resources_logger"}
Process Process-1:1:
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 559, in connect
sock = self._connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 615, in _connect
raise err
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 603, in _connect
sock.connect(socket_address)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/ephemeral_processing_with_export.py", line 176, in start_job
processing.run()
File "/usr/lib/python3.8/site-packages/actinia_core/resources/ephemeral_processing.py", line 1400, in run
self._send_resource_error(message=self.run_state["error"], exception=self.run_state["exception"])
File "/usr/lib/python3.8/site-packages/actinia_core/resources/ephemeral_processing.py", line 422, in _send_resource_error
self._send_to_database(document=data, final=True)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/ephemeral_processing.py", line 437, in _send_to_database
self.resource_logger.commit(user_id=self.user_id, resource_id=self.resource_id, document=document,
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/resources_logger.py", line 72, in commit
redis_return = bool(self.db.set(db_resource_id, document, expiration))
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/redis_resources.py", line 127, in set
return self.redis_server.setex(self.resource_id_prefix + resource_id,
File "/usr/lib/python3.8/site-packages/redis/client.py", line 1822, in setex
return self.execute_command('SETEX', name, time, value)
File "/usr/lib/python3.8/site-packages/redis/client.py", line 898, in execute_command
conn = self.connection or pool.get_connection(command_name, **options)
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 1192, in get_connection
connection.connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 563, in connect
raise ConnectionError(self._error_message(e))
redis.exceptions.ConnectionError: Error 111 connecting to redis:6379. Connection refused.
[2021-08-04 08:13:54,096] ERROR in app: Exception on /api/v1/resources/actinia-gdi/resource_id-dd81df9f-340c-4537-a990-95b0dfb31815 [GET]
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 1198, in get_connection
if connection.can_read():
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 734, in can_read
return self._parser.can_read(timeout)
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 321, in can_read
return self._buffer and self._buffer.can_read(timeout)
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 230, in can_read
self._read_from_socket(timeout=timeout,
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 201, in _read_from_socket
raise ConnectionError(SERVER_CLOSED_CONNECTION_ERROR)
redis.exceptions.ConnectionError: Connection closed by server.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 559, in connect
sock = self._connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 615, in _connect
raise err
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 603, in _connect
sock.connect(socket_address)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/flask/app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/lib/python3.8/site-packages/flask/app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/usr/lib/python3.8/site-packages/flask_restful/__init__.py", line 468, in wrapper
resp = resource(*args, **kwargs)
File "/usr/lib/python3.8/site-packages/flask_httpauth.py", line 148, in decorated
user = self.authenticate(auth, password)
File "/usr/lib/python3.8/site-packages/flask_httpauth.py", line 219, in authenticate
return self.verify_password_callback(username, client_password)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/user_auth.py", line 70, in verify_password
if not user.exists() or not user.verify_password(password):
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/user.py", line 374, in exists
return self.db.exists(self.user_id)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/redis_user.py", line 225, in exists
return self.redis_server.exists(self.user_id_hash_prefix + user_id)
File "/usr/lib/python3.8/site-packages/redis/client.py", line 1581, in exists
return self.execute_command('EXISTS', *names)
File "/usr/lib/python3.8/site-packages/redis/client.py", line 898, in execute_command
conn = self.connection or pool.get_connection(command_name, **options)
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 1202, in get_connection
connection.connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 563, in connect
raise ConnectionError(self._error_message(e))
redis.exceptions.ConnectionError: Error 111 connecting to redis:6379. Connection refused.
[2021-08-04 08:14:41,466] ERROR in app: Exception on /api/v1/resources/actinia-gdi/resource_id-dd81df9f-340c-4537-a990-95b0dfb31815 [GET]
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 559, in connect
sock = self._connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 615, in _connect
raise err
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 603, in _connect
sock.connect(socket_address)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/flask/app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/lib/python3.8/site-packages/flask/app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/usr/lib/python3.8/site-packages/flask_restful/__init__.py", line 468, in wrapper
resp = resource(*args, **kwargs)
File "/usr/lib/python3.8/site-packages/flask_httpauth.py", line 148, in decorated
user = self.authenticate(auth, password)
File "/usr/lib/python3.8/site-packages/flask_httpauth.py", line 219, in authenticate
return self.verify_password_callback(username, client_password)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/user_auth.py", line 70, in verify_password
if not user.exists() or not user.verify_password(password):
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/user.py", line 374, in exists
return self.db.exists(self.user_id)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/redis_user.py", line 225, in exists
return self.redis_server.exists(self.user_id_hash_prefix + user_id)
File "/usr/lib/python3.8/site-packages/redis/client.py", line 1581, in exists
return self.execute_command('EXISTS', *names)
File "/usr/lib/python3.8/site-packages/redis/client.py", line 898, in execute_command
conn = self.connection or pool.get_connection(command_name, **options)
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 1192, in get_connection
connection.connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 563, in connect
raise ConnectionError(self._error_message(e))
redis.exceptions.ConnectionError: Error 111 connecting to redis:6379. Connection refused.
[2021-08-04 08:21:36,664] ERROR in app: Exception on /api/v1/resources/actinia-gdi/resource_id-dd81df9f-340c-4537-a990-95b0dfb31815 [GET]
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 559, in connect
sock = self._connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 615, in _connect
raise err
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 603, in _connect
sock.connect(socket_address)
ConnectionRefusedError: [Errno 111] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.8/site-packages/flask/app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/lib/python3.8/site-packages/flask/app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/usr/lib/python3.8/site-packages/flask_restful/__init__.py", line 468, in wrapper
resp = resource(*args, **kwargs)
File "/usr/lib/python3.8/site-packages/flask_httpauth.py", line 148, in decorated
user = self.authenticate(auth, password)
File "/usr/lib/python3.8/site-packages/flask_httpauth.py", line 219, in authenticate
return self.verify_password_callback(username, client_password)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/user_auth.py", line 70, in verify_password
if not user.exists() or not user.verify_password(password):
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/user.py", line 374, in exists
return self.db.exists(self.user_id)
File "/usr/lib/python3.8/site-packages/actinia_core/resources/common/redis_user.py", line 225, in exists
return self.redis_server.exists(self.user_id_hash_prefix + user_id)
File "/usr/lib/python3.8/site-packages/redis/client.py", line 1581, in exists
return self.execute_command('EXISTS', *names)
File "/usr/lib/python3.8/site-packages/redis/client.py", line 898, in execute_command
conn = self.connection or pool.get_connection(command_name, **options)
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 1192, in get_connection
connection.connect()
File "/usr/lib/python3.8/site-packages/redis/connection.py", line 563, in connect
raise ConnectionError(self._error_message(e))
And one more
exception: {
message: "Error while reading from socket: (104, 'Connection reset by peer')",
traceback: [
" File "/usr/lib/python3.8/site-packages/actinia_core/rest/ephemeral_processing.py", line 1747, in run
self._execute()
",
" File "/usr/lib/python3.8/site-packages/actinia_core/rest/ephemeral_processing_with_export.py", line 604, in _execute
EphemeralProcessing._execute(self)
",
" File "/usr/lib/python3.8/site-packages/actinia_core/rest/ephemeral_processing.py", line 1545, in _execute
self._execute_process_list(process_list=process_list)
",
" File "/usr/lib/python3.8/site-packages/actinia_core/rest/ephemeral_processing.py", line 1714, in _execute_process_list
self._run_module(process)
",
" File "/usr/lib/python3.8/site-packages/actinia_core/rest/ephemeral_processing.py", line 1354, in _run_module
return self._run_executable(process, poll_time)
",
" File "/usr/lib/python3.8/site-packages/actinia_core/rest/ephemeral_processing.py", line 1419, in _run_executable
run_time = self._wait_for_process(process.executable,
",
" File "/usr/lib/python3.8/site-packages/actinia_core/rest/ephemeral_processing.py", line 1229, in _wait_for_process
if self.resource_logger.get_termination(
",
" File "/usr/lib/python3.8/site-packages/actinia_core/core/resources_logger.py", line 251, in get_termination
return self.db.get_termination(db_resource_id)
",
" File "/usr/lib/python3.8/site-packages/actinia_core/core/redis_resources.py", line 209, in get_termination
return bool(self.redis_server.get(
",
" File "/usr/lib/python3.8/site-packages/redis/client.py", line 1606, in get
return self.execute_command('GET', name)
",
" File "/usr/lib/python3.8/site-packages/redis/client.py", line 901, in execute_command
return self.parse_response(conn, command_name, **options)
",
" File "/usr/lib/python3.8/site-packages/redis/client.py", line 915, in parse_response
response = connection.read_response()
",
" File "/usr/lib/python3.8/site-packages/redis/connection.py", line 739, in read_response
response = self._parser.read_response()
",
" File "/usr/lib/python3.8/site-packages/redis/connection.py", line 324, in read_response
raw = self._buffer.readline()
",
" File "/usr/lib/python3.8/site-packages/redis/connection.py", line 256, in readline
self._read_from_socket()
",
" File "/usr/lib/python3.8/site-packages/redis/connection.py", line 222, in _read_from_socket
raise ConnectionError("Error while reading from socket: %s" %
"
],
type: "<class 'redis.exceptions.ConnectionError'>"
},
http_code: 400,