python-documentai-toolbox
python-documentai-toolbox copied to clipboard
`Document.from_batch_process_operation()` fails on running operation
Output from a Jupyter Notebook
{
"name": "TypeError",
"message": "'NoneType' object is not iterable",
"stack": "---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[29], line 1
----> 1 wrapped_documents = Document.from_batch_process_operation(location=\"us\", operation_name=operation.operation.name)
File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/document.py:623, in Document.from_batch_process_operation(cls, location, operation_name, timeout)
586 @classmethod
587 def from_batch_process_operation(
588 cls: Type[\"Document\"],
(...)
591 timeout: Optional[float] = None,
592 ) -> List[\"Document\"]:
593 r\"\"\"Loads Documents from Cloud Storage, using the operation name returned from `batch_process_documents()`.
594
595 .. code-block:: python
(...)
620 A list of wrapped documents from gcs. Each document corresponds to an input file.
621 \"\"\"
622 return cls.from_batch_process_metadata(
--> 623 metadata=_get_batch_process_metadata(
624 operation_name=operation_name,
625 location=location,
626 timeout=timeout,
627 )
628 )
File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/document.py:190, in _get_batch_process_metadata(operation_name, location, timeout)
182 # Poll Operation until complete.
183 operation = operation_from_gapic(
184 operation=client.get_operation(
185 request=GetOperationRequest(name=operation_name),
(...)
188 result_type=documentai.BatchProcessResponse,
189 )
--> 190 operation.result(timeout=timeout)
192 operation_pb = operation.operation
194 # Get Operation metadata.
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/polling.py:256, in PollingFuture.result(self, timeout, retry, polling)
144 def result(self, timeout=_DEFAULT_VALUE, retry=None, polling=None):
145 \"\"\"Get the result of the operation.
146
147 This method will poll for operation status periodically, blocking if
(...)
253 the timeout is reached before the operation completes.
254 \"\"\"
--> 256 self._blocking_poll(timeout=timeout, retry=retry, polling=polling)
258 if self._exception is not None:
259 # pylint: disable=raising-bad-type
260 # Pylint doesn't recognize that this is valid in this case.
261 raise self._exception
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/polling.py:137, in PollingFuture._blocking_poll(self, timeout, retry, polling)
134 polling = polling.with_timeout(timeout)
136 try:
--> 137 polling(self._done_or_raise)(retry=retry)
138 except exceptions.RetryError:
139 raise concurrent.futures.TimeoutError(
140 f\"Operation did not complete within the designated timeout of \"
141 f\"{polling.timeout} seconds.\"
142 )
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_unary.py:293, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
289 target = functools.partial(func, *args, **kwargs)
290 sleep_generator = exponential_sleep_generator(
291 self._initial, self._maximum, multiplier=self._multiplier
292 )
--> 293 return retry_target(
294 target,
295 self._predicate,
296 sleep_generator,
297 timeout=self._timeout,
298 on_error=on_error,
299 )
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_unary.py:153, in retry_target(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)
149 # pylint: disable=broad-except
150 # This function explicitly must deal with broad exceptions.
151 except Exception as exc:
152 # defer to shared logic for handling errors
--> 153 _retry_error_helper(
154 exc,
155 deadline,
156 sleep,
157 error_list,
158 predicate,
159 on_error,
160 exception_factory,
161 timeout,
162 )
163 # if exception not raised, sleep before next attempt
164 time.sleep(sleep)
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_base.py:212, in _retry_error_helper(exc, deadline, next_sleep, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)
206 if not predicate_fn(exc):
207 final_exc, source_exc = exc_factory_fn(
208 error_list,
209 RetryFailureReason.NON_RETRYABLE_ERROR,
210 original_timeout,
211 )
--> 212 raise final_exc from source_exc
213 if on_error_fn is not None:
214 on_error_fn(exc)
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_unary.py:144, in retry_target(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)
142 for sleep in sleep_generator:
143 try:
--> 144 result = target()
145 if inspect.isawaitable(result):
146 warnings.warn(_ASYNC_RETRY_WARNING)
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/polling.py:119, in PollingFuture._done_or_raise(self, retry)
117 def _done_or_raise(self, retry=None):
118 \"\"\"Check if the future is done and raise if it's not.\"\"\"
--> 119 if not self.done(retry=retry):
120 raise _OperationNotComplete()
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/operation.py:174, in Operation.done(self, retry)
165 def done(self, retry=None):
166 \"\"\"Checks to see if the operation is complete.
167
168 Args:
(...)
172 bool: True if the operation is complete, False otherwise.
173 \"\"\"
--> 174 self._refresh_and_update(retry)
175 return self._operation.done
File /opt/homebrew/lib/python3.11/site-packages/google/api_core/operation.py:162, in Operation._refresh_and_update(self, retry)
159 # If the currently cached operation is done, no need to make another
160 # RPC as it will not change once done.
161 if not self._operation.done:
--> 162 self._operation = self._refresh(retry=retry) if retry else self._refresh()
163 self._set_result_from_operation()
File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_v1/services/document_processor_service/client.py:3572, in DocumentProcessorServiceClient.get_operation(self, request, retry, timeout, metadata)
3564 rpc = gapic_v1.method.wrap_method(
3565 self._transport.get_operation,
3566 default_timeout=None,
3567 client_info=DEFAULT_CLIENT_INFO,
3568 )
3570 # Certain fields should be provided within the metadata header;
3571 # add these here.
-> 3572 metadata = tuple(metadata) + (
3573 gapic_v1.routing_header.to_grpc_metadata(((\"name\", request.name),)),
3574 )
3576 # Validate the universe domain.
3577 self._validate_universe_domain()
TypeError: 'NoneType' object is not iterable"
}
Hi, wondering if there is any update or fix here?
Workaround for now, use Document.from_batch_process_metadata()
instead
from google.cloud import documentai
from google.cloud.documentai_toolbox import document
operation = client.batch_process_documents(request)
operation.result(timeout=timeout)
metadata = documentai.BatchProcessMetadata(operation.metadata)
wrapped_documents = document.Document.from_batch_process_metadata(metadata)