bundle-kubeflow icon indicating copy to clipboard operation
bundle-kubeflow copied to clipboard

`kserve-integration` UAT fails in the CI on AKS and EKS with `Notebook execution failed with KeyError: 'status'`

Open NohaIhab opened this issue 4 months ago • 2 comments

Bug Description

Seen in the scheduled runs in AKS and EKS, the kserve-integration UAT failed. Logs are attached in the Relevant Log Output.

To Reproduce

run the Create AKS cluster, deploy CKF and run bundle test or Create EKS cluster, deploy CKF and run bundle test action in the CI for latest/edge

Environment

AKS 1.29 EKS 1.29 kubeflow bundle latest/edge juju 3.4/stable

Relevant Log Output

=================================== FAILURES ===================================
______________________ test_notebook[kserve-integration] _______________________

test_notebook = '/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/notebooks/kserve/kserve-integration.ipynb'

    @pytest.mark.ipynb
    @pytest.mark.parametrize(
        # notebook - ipynb file to execute
        "test_notebook",
        NOTEBOOKS.values(),
        ids=NOTEBOOKS.keys(),
    )
    def test_notebook(test_notebook):
        """Test Notebook Generic Wrapper."""
        os.chdir(os.path.dirname(test_notebook))
    
        with open(test_notebook) as nb:
            notebook = nbformat.read(nb, as_version=nbformat.NO_CONVERT)
    
        ep = ExecutePreprocessor(
            timeout=-1, kernel_name="python3", on_notebook_start=install_python_requirements
        )
        ep.skip_cells_with_tag = "pytest-skip"
    
        try:
            log.info(f"Running ***os.path.basename(test_notebook)***...")
>           output_notebook, _ = ep.preprocess(notebook, ***"metadata": ***"path": "./"***)

/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/test_notebooks.py:45: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/opt/conda/lib/python3.11/site-packages/nbconvert/preprocessors/execute.py:103: in preprocess
    self.preprocess_cell(cell, resources, index)
/opt/conda/lib/python3.11/site-packages/nbconvert/preprocessors/execute.py:124: in preprocess_cell
    cell = self.execute_cell(cell, index, store_history=True)
/opt/conda/lib/python3.11/site-packages/jupyter_core/utils/__init__.py:165: in wrapped
    return loop.run_until_complete(inner)
/opt/conda/lib/python3.11/asyncio/base_events.py:654: in run_until_complete
    return future.result()
/opt/conda/lib/python3.11/site-packages/nbclient/client.py:1062: in async_execute_cell
    await self._check_raise_for_error(cell, cell_index, exec_reply)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <nbconvert.preprocessors.execute.ExecutePreprocessor object at 0x7fc7cc2e6650>
cell = ***'cell_type': 'code', 'execution_count': 8, 'id': '8522c4e9-07b7-4bff-9b49-3675ff19bacc', 'metadata': ***'execution': ***'...sp = client.get(ISVC_NAME)\nisvc_url = isvc_resp[\'status\'][\'address\'][\'url\']\nprint("Inference URL:", isvc_url)'***
cell_index = 16
exec_reply = ***'buffers': [], 'content': ***'ename': 'KeyError', 'engine_info': ***'engine_id': -1, 'engine_uuid': 'd5e7bdef-d6d9-4ef2-9...e, 'engine': 'd5e7bdef-d6d9-4ef2-9700-5284518715ee', 'started': '2024-10-01T01:07:25.404728Z', 'status': 'error'***, ...***

    async def _check_raise_for_error(
        self, cell: NotebookNode, cell_index: int, exec_reply: dict[str, t.Any] | None
    ) -> None:
        if exec_reply is None:
            return None
    
        exec_reply_content = exec_reply["content"]
        if exec_reply_content["status"] != "error":
            return None
    
        cell_allows_errors = (not self.force_raise_errors) and (
            self.allow_errors
            or exec_reply_content.get("ename") in self.allow_error_names
            or "raises-exception" in cell.metadata.get("tags", [])
        )
        await run_hook(
            self.on_cell_error, cell=cell, cell_index=cell_index, execute_reply=exec_reply
        )
        if not cell_allows_errors:
>           raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content)
E           nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
E           ------------------
E           isvc_resp = client.get(ISVC_NAME)
E           isvc_url = isvc_resp['status']['address']['url']
E           print("Inference URL:", isvc_url)
E           ------------------
E           
E           
E           ---------------------------------------------------------------------------
E           KeyError                                  Traceback (most recent call last)
E           Cell In[8], line 2
E                 1 isvc_resp = client.get(ISVC_NAME)
E           ----> 2 isvc_url = isvc_resp['status']['address']['url']
E                 3 print("Inference URL:", isvc_url)
E           
E           KeyError: 'status'

/opt/conda/lib/python3.11/site-packages/nbclient/client.py:918: CellExecutionError

During handling of the above exception, another exception occurred:

test_notebook = '/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/notebooks/kserve/kserve-integration.ipynb'

    @pytest.mark.ipynb
    @pytest.mark.parametrize(
        # notebook - ipynb file to execute
        "test_notebook",
        NOTEBOOKS.values(),
        ids=NOTEBOOKS.keys(),
    )
    def test_notebook(test_notebook):
        """Test Notebook Generic Wrapper."""
        os.chdir(os.path.dirname(test_notebook))
    
        with open(test_notebook) as nb:
            notebook = nbformat.read(nb, as_version=nbformat.NO_CONVERT)
    
        ep = ExecutePreprocessor(
            timeout=-1, kernel_name="python3", on_notebook_start=install_python_requirements
        )
        ep.skip_cells_with_tag = "pytest-skip"
    
        try:
            log.info(f"Running ***os.path.basename(test_notebook)***...")
            output_notebook, _ = ep.preprocess(notebook, ***"metadata": ***"path": "./"***)
            # persist the notebook output to the original file for debugging purposes
            save_notebook(output_notebook, test_notebook)
        except CellExecutionError as e:
            # handle underlying error
>           pytest.fail(f"Notebook execution failed with ***e.ename***: ***e.evalue***")
E           Failed: Notebook execution failed with KeyError: 'status'

/tests/.worktrees/b9848a5695a361eba1d9b0cfb2fddc99460b304e/tests/test_notebooks.py:50: Failed

Additional Context

No response

NohaIhab avatar Oct 02 '24 10:10 NohaIhab