krkn icon indicating copy to clipboard operation
krkn copied to clipboard

etcd container kill scenario doesn't wait for the apiserver to become available

Open Noreen21 opened this issue 1 year ago • 1 comments

2023-04-21 16:44:27,328 [INFO] Executing scenarios for iteration 0
2023-04-21 16:44:27,328 [INFO] connection set up
127.0.0.1 - - [21/Apr/2023 16:44:27] "GET / HTTP/1.1" 200 -
2023-04-21 16:44:27,329 [INFO] response RUN
2023-04-21 16:44:27,330 [INFO] Running container scenarios
2023-04-21 16:44:27,843 [INFO] Killing container etcd in pod etcd-e28-h35-r750 (ns openshift-etcd)
2023-04-21 16:44:28,140 [INFO] Scenario kill etcd container successfully injected
^[[B2023-04-21 16:45:13,931 [WARNING] Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection withou$ response'))': /api/v1/namespaces/openshift-etcd/pods?pretty=True
2023-04-21 16:45:14,033 [WARNING] Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))': /a$ i/v1/namespaces/openshift-etcd/pods?pretty=True
2023-04-21 16:45:14,034 [WARNING] Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f991506afd0>: Failed to est$ blish a new connection: [Errno 111] Connection refused')': /api/v1/namespaces/openshift-etcd/pods?pretty=True
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 174, in _new_conn
conn = connection.create_connection(
File "/usr/local/lib/python3.9/site-packages/urllib3/util/connection.py", line 95, in create_connection
raise err
File "/usr/local/lib/python3.9/site-packages/urllib3/util/connection.py", line 85, in create_connection
sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 386, in _make_request
self._validate_conn(conn)
File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1042, in _validate_conn
conn.connect()
File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 363, in connect
self.sock = conn = self._new_conn() File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 186, in _new_conn raise NewConnectionError( urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x7f9915029520>: Failed to establish a new connection: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last): File "/root/sno/e28-h35-r750/krkn/run_kraken.py", line 403, in main(options.cfg) File "/root/sno/e28-h35-r750/krkn/run_kraken.py", line 214, in main failed_post_scenarios = pod_scenarios.container_run( File "/root/sno/e28-h35-r750/krkn/kraken/pod_scenarios/setup.py", line 92, in container_run failed_post_scenarios = check_failed_containers( File "/root/sno/e28-h35-r750/krkn/kraken/pod_scenarios/setup.py", line 191, in check_failed_containers pod_output = kubecli.get_pod_info(killed_container[0], killed_container[1]) File "/root/sno/e28-h35-r750/krkn/kraken/kubernetes/client.py", line 544, in get_pod_info pod_exists = check_if_pod_exists(name=name, namespace=namespace) File "/root/sno/e28-h35-r750/krkn/kraken/kubernetes/client.py", line 721, in check_if_pod_exists pod_list = list_pods(namespace=namespace) File "/root/sno/e28-h35-r750/krkn/kraken/kubernetes/client.py", line 209, in list_pods ret = cli.list_namespaced_pod(namespace, pretty=True) File "/usr/local/lib/python3.9/site-packages/kubernetes/client/api/core_v1_api.py", line 15697, in list_namespaced_pod return self.list_namespaced_pod_with_http_info(namespace, **kwargs) # noqa: E501 File "/usr/local/lib/python3.9/site-packages/kubernetes/client/api/core_v1_api.py", line 15812, in list_namespaced_pod_with_http_info return self.api_client.call_api( File "/usr/local/lib/python3.9/site-packages/kubernetes/client/api_client.py", line 348, in call_api return self.__call_api(resource_path, method, File "/usr/local/lib/python3.9/site-packages/kubernetes/client/api_client.py", line 180, in __call_api response_data = self.request( File "/usr/local/lib/python3.9/site-packages/kubernetes/client/api_client.py", line 373, in request return self.rest_client.GET(url, File "/usr/local/lib/python3.9/site-packages/kubernetes/client/rest.py", line 241, in GET return self.request("GET", url, File "/usr/local/lib/python3.9/site-packages/kubernetes/client/rest.py", line 214, in request r = self.pool_manager.request(method, url, File "/usr/local/lib/python3.9/site-packages/urllib3/request.py", line 74, in request return self.request_encode_url( File "/usr/local/lib/python3.9/site-packages/urllib3/request.py", line 96, in request_encode_url return self.urlopen(method, url, **extra_kw) File "/usr/local/lib/python3.9/site-packages/urllib3/poolmanager.py", line 376, in urlopen response = conn.urlopen(method, u.request_uri, **kw) File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 815, in urlopen return self.urlopen( File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 815, in urlopen return self.urlopen( File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 815, in urlopen return self.urlopen( File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 787, in urlopen retries = retries.increment( File "/usr/local/lib/python3.9/site-packages/urllib3/util/retry.py", line 592, in increment raise MaxRetryError(_pool, url, error or ResponseError(cause)) urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='api.e28-h35-r750.example.com', port=6443): Max retries exceeded with url: /api/v1/namespaces/openshift-etcd/pods?pretty=True (Caused by NewConnectionError('<urllib3.connection.HT TPSConnection object at 0x7f9915029520>: Failed to establish a new connection: [Errno 111] Connection refused'))

Noreen21 avatar Apr 24 '23 05:04 Noreen21

Also cerberus doesn't publish the apiserver failure status

Noreen21 avatar Apr 24 '23 05:04 Noreen21