set_planning_config don't seems to be shared with session
Describe the bug
trying daft with the onelake iceberg catalog, duckdb, and pyiceberg works, daft can list the table, but scanning generate an error
DaftCoreException: DaftError::External Unauthorized to access store: AzureBlob for file: abfs://[[email protected]](mailto:[email protected])/4be4416-cd333a14a84e/Tables/xxxx/delta/part-00001-71e1d097-eeb4-48be-83bf-f42a2a708d21-c000.snappy.parquet
You may need to set valid Credentials
server returned error status which will not be retried: 401
To Reproduce
https://drive.google.com/file/d/1o_SyIDZF9CIbVZxOr1cX38pm9bXlp2w2/view?usp=sharing
Expected behavior
No response
Component(s)
Other
Additional context
No response
Hey @djouallah, could you share how you are setting the io_config?
Are you using sessions with a default io_config?
from daft import Session
from daft.io import IOConfig, AzureConfig
import daft
io_config = IOConfig( azure=AzureConfig(storage_account="onelake",use_fabric_endpoint=True,bearer_token= os.environ.get("AZURE_STORAGE_TOKEN") ))
daft.context.set_planning_config(default_io_config=io_config)
sess = Session()
sess.attach_catalog(catalog)
sess.sql(f" use onelake.{schema}")
sess.sql(f" show tables LIKE '{schema}' ").show()
@desmondcheongzx @rchowell do either of you have context here and can triage appropriately?
Hey @djouallah,
Does it work if you do a tbl = catalog.load_table("tbl") then do daft.read_iceberg(tbl)?
For the team's reference, the SQL implementation should be using the same path as read_iceberg which should handle the default IOConfig. This check would help us narrow down if the issue is isolated to SQL or may be in the next layer.
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
[/usr/local/lib/python3.12/dist-packages/pyiceberg/catalog/rest/__init__.py](https://localhost:8080/#) in load_table(self, identifier)
628 try:
--> 629 response.raise_for_status()
630 except HTTPError as exc:
10 frames
[/usr/local/lib/python3.12/dist-packages/requests/models.py](https://localhost:8080/#) in raise_for_status(self)
1025 if http_error_msg:
-> 1026 raise HTTPError(http_error_msg, response=self)
1027
HTTPError: 500 Server Error: Internal Server Error for url: https://onelake.table.fabric.microsoft.com/iceberg/v1/tmp/data.lakehouse/namespaces/%7Bschema%7D/tables/%7Btbl%7D
The above exception was the direct cause of the following exception:
ServerError Traceback (most recent call last)
[/tmp/ipython-input-455834275.py](https://localhost:8080/#) in <cell line: 0>()
4 io_config = IOConfig( azure=AzureConfig(storage_account="onelake",use_fabric_endpoint=True,bearer_token= os.environ.get("AZURE_STORAGE_TOKEN") ))
5 daft.context.set_planning_config(default_io_config=io_config)
----> 6 tbl = catalog.load_table("{schema}.{tbl}")
7 daft.read_iceberg(tbl)
[/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py](https://localhost:8080/#) in wrapped_f(*args, **kw)
334 copy = self.copy()
335 wrapped_f.statistics = copy.statistics # type: ignore[attr-defined]
--> 336 return copy(f, *args, **kw)
337
338 def retry_with(*args: t.Any, **kwargs: t.Any) -> WrappedFn:
[/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py](https://localhost:8080/#) in __call__(self, fn, *args, **kwargs)
473 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
474 while True:
--> 475 do = self.iter(retry_state=retry_state)
476 if isinstance(do, DoAttempt):
477 try:
[/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py](https://localhost:8080/#) in iter(self, retry_state)
374 result = None
375 for action in self.iter_state.actions:
--> 376 result = action(retry_state)
377 return result
378
[/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py](https://localhost:8080/#) in <lambda>(rs)
396 def _post_retry_check_actions(self, retry_state: "RetryCallState") -> None:
397 if not (self.iter_state.is_explicit_retry or self.iter_state.retry_run_result):
--> 398 self._add_action_func(lambda rs: rs.outcome.result())
399 return
400
[/usr/lib/python3.12/concurrent/futures/_base.py](https://localhost:8080/#) in result(self, timeout)
447 raise CancelledError()
448 elif self._state == FINISHED:
--> 449 return self.__get_result()
450
451 self._condition.wait(timeout)
[/usr/lib/python3.12/concurrent/futures/_base.py](https://localhost:8080/#) in __get_result(self)
399 if self._exception:
400 try:
--> 401 raise self._exception
402 finally:
403 # Break a reference cycle with the exception in self._exception
[/usr/local/lib/python3.12/dist-packages/tenacity/__init__.py](https://localhost:8080/#) in __call__(self, fn, *args, **kwargs)
476 if isinstance(do, DoAttempt):
477 try:
--> 478 result = fn(*args, **kwargs)
479 except BaseException: # noqa: B902
480 retry_state.set_exception(sys.exc_info()) # type: ignore[arg-type]
[/usr/local/lib/python3.12/dist-packages/pyiceberg/catalog/rest/__init__.py](https://localhost:8080/#) in load_table(self, identifier)
629 response.raise_for_status()
630 except HTTPError as exc:
--> 631 _handle_non_200_response(exc, {404: NoSuchTableError})
632
633 table_response = TableResponse.model_validate_json(response.text)
[/usr/local/lib/python3.12/dist-packages/pyiceberg/catalog/rest/response.py](https://localhost:8080/#) in _handle_non_200_response(exc, error_handler)
109 response = f"RESTError {exc.response.status_code}: Received unexpected JSON Payload: {exc.response.text}, errors: {errs}"
110
--> 111 raise exception(response) from exc
ServerError: RESTError 500: Received unexpected JSON Payload: {
"error": {
"code": "InternalServerError",
"message": "Input string was not in a correct format.",
"target": null,
"details": null
}
}, errors: Field required, Input should be a valid integer, unable to parse string as an integer