python-documentai-toolbox icon indicating copy to clipboard operation
python-documentai-toolbox copied to clipboard

`Document.from_batch_process_operation()` fails on running operation

Open holtskinner opened this issue 11 months ago • 2 comments

Output from a Jupyter Notebook

	"name": "TypeError",
	"message": "'NoneType' object is not iterable",
	"stack": "---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[29], line 1
----> 1 wrapped_documents = Document.from_batch_process_operation(location=\"us\",

File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/, in Document.from_batch_process_operation(cls, location, operation_name, timeout)
    586 @classmethod
    587 def from_batch_process_operation(
    588     cls: Type[\"Document\"],
    591     timeout: Optional[float] = None,
    592 ) -> List[\"Document\"]:
    593     r\"\"\"Loads Documents from Cloud Storage, using the operation name returned from `batch_process_documents()`.
    595         .. code-block:: python
    620             A list of wrapped documents from gcs. Each document corresponds to an input file.
    621     \"\"\"
    622     return cls.from_batch_process_metadata(
--> 623         metadata=_get_batch_process_metadata(
    624             operation_name=operation_name,
    625             location=location,
    626             timeout=timeout,
    627         )
    628     )

File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/, in _get_batch_process_metadata(operation_name, location, timeout)
    182 # Poll Operation until complete.
    183 operation = operation_from_gapic(
    184     operation=client.get_operation(
    185         request=GetOperationRequest(name=operation_name),
    188     result_type=documentai.BatchProcessResponse,
    189 )
--> 190 operation.result(timeout=timeout)
    192 operation_pb = operation.operation
    194 # Get Operation metadata.

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/, in PollingFuture.result(self, timeout, retry, polling)
    144 def result(self, timeout=_DEFAULT_VALUE, retry=None, polling=None):
    145     \"\"\"Get the result of the operation.
    147     This method will poll for operation status periodically, blocking if
    253             the timeout is reached before the operation completes.
    254     \"\"\"
--> 256     self._blocking_poll(timeout=timeout, retry=retry, polling=polling)
    258     if self._exception is not None:
    259         # pylint: disable=raising-bad-type
    260         # Pylint doesn't recognize that this is valid in this case.
    261         raise self._exception

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/, in PollingFuture._blocking_poll(self, timeout, retry, polling)
    134     polling = polling.with_timeout(timeout)
    136 try:
--> 137     polling(self._done_or_raise)(retry=retry)
    138 except exceptions.RetryError:
    139     raise concurrent.futures.TimeoutError(
    140         f\"Operation did not complete within the designated timeout of \"
    141         f\"{polling.timeout} seconds.\"
    142     )

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
    289 target = functools.partial(func, *args, **kwargs)
    290 sleep_generator = exponential_sleep_generator(
    291     self._initial, self._maximum, multiplier=self._multiplier
    292 )
--> 293 return retry_target(
    294     target,
    295     self._predicate,
    296     sleep_generator,
    297     timeout=self._timeout,
    298     on_error=on_error,
    299 )

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/, in retry_target(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)
    149 # pylint: disable=broad-except
    150 # This function explicitly must deal with broad exceptions.
    151 except Exception as exc:
    152     # defer to shared logic for handling errors
--> 153     _retry_error_helper(
    154         exc,
    155         deadline,
    156         sleep,
    157         error_list,
    158         predicate,
    159         on_error,
    160         exception_factory,
    161         timeout,
    162     )
    163     # if exception not raised, sleep before next attempt
    164     time.sleep(sleep)

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/, in _retry_error_helper(exc, deadline, next_sleep, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)
    206 if not predicate_fn(exc):
    207     final_exc, source_exc = exc_factory_fn(
    208         error_list,
    209         RetryFailureReason.NON_RETRYABLE_ERROR,
    210         original_timeout,
    211     )
--> 212     raise final_exc from source_exc
    213 if on_error_fn is not None:
    214     on_error_fn(exc)

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/, in retry_target(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)
    142 for sleep in sleep_generator:
    143     try:
--> 144         result = target()
    145         if inspect.isawaitable(result):
    146             warnings.warn(_ASYNC_RETRY_WARNING)

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/, in PollingFuture._done_or_raise(self, retry)
    117 def _done_or_raise(self, retry=None):
    118     \"\"\"Check if the future is done and raise if it's not.\"\"\"
--> 119     if not self.done(retry=retry):
    120         raise _OperationNotComplete()

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/, in Operation.done(self, retry)
    165 def done(self, retry=None):
    166     \"\"\"Checks to see if the operation is complete.
    168     Args:
    172         bool: True if the operation is complete, False otherwise.
    173     \"\"\"
--> 174     self._refresh_and_update(retry)
    175     return self._operation.done

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/, in Operation._refresh_and_update(self, retry)
    159 # If the currently cached operation is done, no need to make another
    160 # RPC as it will not change once done.
    161 if not self._operation.done:
--> 162     self._operation = self._refresh(retry=retry) if retry else self._refresh()
    163     self._set_result_from_operation()

File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_v1/services/document_processor_service/, in DocumentProcessorServiceClient.get_operation(self, request, retry, timeout, metadata)
   3564 rpc = gapic_v1.method.wrap_method(
   3565     self._transport.get_operation,
   3566     default_timeout=None,
   3567     client_info=DEFAULT_CLIENT_INFO,
   3568 )
   3570 # Certain fields should be provided within the metadata header;
   3571 # add these here.
-> 3572 metadata = tuple(metadata) + (
   3573     gapic_v1.routing_header.to_grpc_metadata(((\"name\",,)),
   3574 )
   3576 # Validate the universe domain.
   3577 self._validate_universe_domain()

TypeError: 'NoneType' object is not iterable"

holtskinner avatar Mar 14 '24 18:03 holtskinner

Hi, wondering if there is any update or fix here?

spencerbraun avatar Apr 24 '24 01:04 spencerbraun

Workaround for now, use Document.from_batch_process_metadata() instead

from import documentai
from import document

operation = client.batch_process_documents(request)
metadata = documentai.BatchProcessMetadata(operation.metadata)
wrapped_documents = document.Document.from_batch_process_metadata(metadata)

holtskinner avatar Jul 02 '24 18:07 holtskinner