bundle-kubeflow
bundle-kubeflow copied to clipboard
`kserve-integration` UAT fails in the CI on AKS and EKS with `Notebook execution failed with KeyError: 'status'`
Bug Description
Seen in the scheduled runs in AKS and EKS, the kserve-integration
UAT failed.
Logs are attached in the Relevant Log Output.
To Reproduce
run the Create AKS cluster, deploy CKF and run bundle test
or Create EKS cluster, deploy CKF and run bundle test
action in the CI for latest/edge
Environment
AKS 1.29
EKS 1.29
kubeflow bundle latest/edge
juju 3.4/stable
Relevant Log Output
=================================== FAILURES ===================================
______________________ test_notebook[kserve-integration] _______________________
test_notebook = '/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/notebooks/kserve/kserve-integration.ipynb'
@pytest.mark.ipynb
@pytest.mark.parametrize(
# notebook - ipynb file to execute
"test_notebook",
NOTEBOOKS.values(),
ids=NOTEBOOKS.keys(),
)
def test_notebook(test_notebook):
"""Test Notebook Generic Wrapper."""
os.chdir(os.path.dirname(test_notebook))
with open(test_notebook) as nb:
notebook = nbformat.read(nb, as_version=nbformat.NO_CONVERT)
ep = ExecutePreprocessor(
timeout=-1, kernel_name="python3", on_notebook_start=install_python_requirements
)
ep.skip_cells_with_tag = "pytest-skip"
try:
log.info(f"Running ***os.path.basename(test_notebook)***...")
> output_notebook, _ = ep.preprocess(notebook, ***"metadata": ***"path": "./"***)
/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/test_notebooks.py:45:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/opt/conda/lib/python3.11/site-packages/nbconvert/preprocessors/execute.py:103: in preprocess
self.preprocess_cell(cell, resources, index)
/opt/conda/lib/python3.11/site-packages/nbconvert/preprocessors/execute.py:124: in preprocess_cell
cell = self.execute_cell(cell, index, store_history=True)
/opt/conda/lib/python3.11/site-packages/jupyter_core/utils/__init__.py:165: in wrapped
return loop.run_until_complete(inner)
/opt/conda/lib/python3.11/asyncio/base_events.py:654: in run_until_complete
return future.result()
/opt/conda/lib/python3.11/site-packages/nbclient/client.py:1062: in async_execute_cell
await self._check_raise_for_error(cell, cell_index, exec_reply)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <nbconvert.preprocessors.execute.ExecutePreprocessor object at 0x7fc7cc2e6650>
cell = ***'cell_type': 'code', 'execution_count': 8, 'id': '8522c4e9-07b7-4bff-9b49-3675ff19bacc', 'metadata': ***'execution': ***'...sp = client.get(ISVC_NAME)\nisvc_url = isvc_resp[\'status\'][\'address\'][\'url\']\nprint("Inference URL:", isvc_url)'***
cell_index = 16
exec_reply = ***'buffers': [], 'content': ***'ename': 'KeyError', 'engine_info': ***'engine_id': -1, 'engine_uuid': 'd5e7bdef-d6d9-4ef2-9...e, 'engine': 'd5e7bdef-d6d9-4ef2-9700-5284518715ee', 'started': '2024-10-01T01:07:25.404728Z', 'status': 'error'***, ...***
async def _check_raise_for_error(
self, cell: NotebookNode, cell_index: int, exec_reply: dict[str, t.Any] | None
) -> None:
if exec_reply is None:
return None
exec_reply_content = exec_reply["content"]
if exec_reply_content["status"] != "error":
return None
cell_allows_errors = (not self.force_raise_errors) and (
self.allow_errors
or exec_reply_content.get("ename") in self.allow_error_names
or "raises-exception" in cell.metadata.get("tags", [])
)
await run_hook(
self.on_cell_error, cell=cell, cell_index=cell_index, execute_reply=exec_reply
)
if not cell_allows_errors:
> raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content)
E nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
E ------------------
E isvc_resp = client.get(ISVC_NAME)
E isvc_url = isvc_resp['status']['address']['url']
E print("Inference URL:", isvc_url)
E ------------------
E
E
E ---------------------------------------------------------------------------
E KeyError Traceback (most recent call last)
E Cell In[8], line 2
E 1 isvc_resp = client.get(ISVC_NAME)
E ----> 2 isvc_url = isvc_resp['status']['address']['url']
E 3 print("Inference URL:", isvc_url)
E
E KeyError: 'status'
/opt/conda/lib/python3.11/site-packages/nbclient/client.py:918: CellExecutionError
During handling of the above exception, another exception occurred:
test_notebook = '/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/notebooks/kserve/kserve-integration.ipynb'
@pytest.mark.ipynb
@pytest.mark.parametrize(
# notebook - ipynb file to execute
"test_notebook",
NOTEBOOKS.values(),
ids=NOTEBOOKS.keys(),
)
def test_notebook(test_notebook):
"""Test Notebook Generic Wrapper."""
os.chdir(os.path.dirname(test_notebook))
with open(test_notebook) as nb:
notebook = nbformat.read(nb, as_version=nbformat.NO_CONVERT)
ep = ExecutePreprocessor(
timeout=-1, kernel_name="python3", on_notebook_start=install_python_requirements
)
ep.skip_cells_with_tag = "pytest-skip"
try:
log.info(f"Running ***os.path.basename(test_notebook)***...")
output_notebook, _ = ep.preprocess(notebook, ***"metadata": ***"path": "./"***)
# persist the notebook output to the original file for debugging purposes
save_notebook(output_notebook, test_notebook)
except CellExecutionError as e:
# handle underlying error
> pytest.fail(f"Notebook execution failed with ***e.ename***: ***e.evalue***")
E Failed: Notebook execution failed with KeyError: 'status'
/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/test_notebooks.py:50: Failed
Additional Context
No response