neptune-client
neptune-client copied to clipboard
BUG: cannot recover from NeptuneFieldCountLimitExceedException
Describe the bug
I created a run with large number of fields, and now cannot do anything with it due to NeptuneFieldCountLimitExceedException
preventing loading the run. Particularly, I cannot delete any fields to recover from this state.
Reproduction
Create a run with more than 9000 fields. Then try to load this run:
run = neptune.init_run(
with_id=run_id,
project=project_name,
mode='sync',
)
Expected behavior
I expected the run to be loaded. If there is a limit on the number of the fields, I expected that I would be able to delete some fields and than save the run to the server. In fact, I cannot delete any fields, as I cannot load the run. Thus I do not see any ways to deal with that run at all.
Traceback
---------------------------------------------------------------------------
HTTPUnprocessableEntity Traceback (most recent call last)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/backends/swagger_client_wrapper.py:111, in ApiMethodWrapper.__call__(self, *args, **kwargs)
110 future = self._api_method(*args, **kwargs)
--> 111 return FinishedApiResponseFuture(future.response()) # wait synchronously
112 except HTTPError as e:
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/bravado/http_future.py:200, in HttpFuture.response(self, timeout, fallback_result, exceptions_to_catch)
198 request_end_time = monotonic.monotonic()
--> 200 swagger_result = self._get_swagger_result(incoming_response)
202 if self.operation is None and incoming_response.status_code >= 300:
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/bravado/http_future.py:124, in reraise_errors.<locals>.wrapper(self, *args, **kwargs)
123 try:
--> 124 return func(self, *args, **kwargs)
125 except connection_errors as exception:
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/bravado/http_future.py:300, in HttpFuture._get_swagger_result(self, incoming_response)
299 if self.operation is not None:
--> 300 unmarshal_response(
301 incoming_response,
302 self.operation,
303 self.request_config.response_callbacks,
304 )
305 swagger_result = typing.cast(T, incoming_response.swagger_result)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/bravado/http_future.py:353, in unmarshal_response(incoming_response, operation, response_callbacks)
351 response_callback(incoming_response, operation)
--> 353 raise_on_expected(incoming_response)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/bravado/http_future.py:420, in raise_on_expected(http_response)
419 if not 200 <= http_response.status_code < 300:
--> 420 raise make_http_exception(
421 response=http_response,
422 swagger_result=http_response.swagger_result)
HTTPUnprocessableEntity: 422 unknown
The above exception was the direct cause of the following exception:
NeptuneFieldCountLimitExceedException Traceback (most recent call last)
Cell In[4], line 1
----> 1 strange_run = get_run_by_id(project, 'TRAN-30', mode='sync')
File ~/prj/organization/project/neptune_helpers.py:22, in get_run_by_id(project, run_id, mode)
20 else:
21 project_name = project
---> 22 return neptune.init_run(
23 with_id=run_id,
24 project=project_name,
25 api_token=neptune_api_token,
26 mode=mode,
27 )
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/metadata_containers/run.py:410, in Run.__init__(self, with_id, project, api_token, custom_run_id, mode, name, description, tags, source_files, capture_stdout, capture_stderr, capture_hardware_metrics, fail_on_exception, monitoring_namespace, flush_period, proxies, capture_traceback, git_ref, dependencies, async_lag_callback, async_lag_threshold, async_no_progress_callback, async_no_progress_threshold, **kwargs)
407 if mode == Mode.OFFLINE or mode == Mode.DEBUG:
408 project = OFFLINE_PROJECT_QUALIFIED_NAME
--> 410 super().__init__(
411 project=project,
412 api_token=api_token,
413 mode=mode,
414 flush_period=flush_period,
415 proxies=proxies,
416 async_lag_callback=async_lag_callback,
417 async_lag_threshold=async_lag_threshold,
418 async_no_progress_callback=async_no_progress_callback,
419 async_no_progress_threshold=async_no_progress_threshold,
420 )
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/metadata_containers/metadata_container.py:202, in MetadataContainer.__init__(self, project, api_token, mode, flush_period, proxies, async_lag_callback, async_lag_threshold, async_no_progress_callback, async_no_progress_threshold)
199 self.sync(wait=False)
201 if self._mode != Mode.READ_ONLY:
--> 202 self._write_initial_attributes()
204 self._startup(debug_mode=mode == Mode.DEBUG)
206 try:
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/metadata_containers/run.py:498, in Run._write_initial_attributes(self)
495 self[SYSTEM_DESCRIPTION_ATTRIBUTE_PATH] = self._description
497 if any((self._capture_stderr, self._capture_stdout, self._capture_traceback, self._capture_hardware_metrics)):
--> 498 self._write_initial_monitoring_attributes()
500 if self._tags is not None:
501 self[SYSTEM_TAGS_ATTRIBUTE_PATH].add(self._tags)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/metadata_containers/run.py:477, in Run._write_initial_monitoring_attributes(self)
475 def _write_initial_monitoring_attributes(self) -> None:
476 if self._hostname is not None:
--> 477 self[f"{self._monitoring_namespace}/hostname"] = self._hostname
478 if self._with_id is None:
479 self[SYSTEM_HOSTNAME_ATTRIBUTE_PATH] = self._hostname
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/metadata_containers/metadata_container.py:118, in ensure_not_stopped.<locals>.inner_fun(self, *args, **kwargs)
115 @wraps(fun)
116 def inner_fun(self: "MetadataContainer", *args, **kwargs):
117 self._raise_if_stopped()
--> 118 return fun(self, *args, **kwargs)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/metadata_containers/metadata_container.py:344, in MetadataContainer.__setitem__(self, key, value)
342 @ensure_not_stopped
343 def __setitem__(self, key: str, value) -> None:
--> 344 self.__getitem__(key).assign(value)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/handler.py:88, in check_protected_paths.<locals>.inner_fun(self, *args, **kwargs)
85 @wraps(fun)
86 def inner_fun(self: "Handler", *args, **kwargs):
87 validate_path_not_protected(self._path, self)
---> 88 return fun(self, *args, **kwargs)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/handler.py:213, in Handler.assign(self, value, wait)
211 attr = self._container.get_attribute(self._path)
212 if attr is None:
--> 213 self._container.define(self._path, value)
214 else:
215 if isinstance(value, Handler):
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/metadata_containers/metadata_container.py:546, in MetadataContainer.define(self, path, value, wait)
544 attr = ValueToAttributeVisitor(self, parse_path(path)).visit(neptune_value)
545 self.set_attribute(path, attr)
--> 546 attr.process_assignment(neptune_value, wait=wait)
547 return attr
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/attributes/attribute.py:66, in Attribute.process_assignment(self, value, wait)
64 return self.copy(value, wait=wait)
65 else:
---> 66 return self.assign(value, wait=wait)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/attributes/atoms/string.py:75, in String.assign(self, value, wait)
66 logger.warning(
67 "Warning: string '%s' value was"
68 " longer than %s characters and was truncated."
(...)
71 String.MAX_VALUE_LENGTH,
72 )
74 with self._container.lock():
---> 75 self._enqueue_operation(self.create_assignment_operation(self._path, value.value), wait=wait)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/attributes/attribute.py:45, in Attribute._enqueue_operation(self, operation, wait)
44 def _enqueue_operation(self, operation: Operation, *, wait: bool):
---> 45 self._container._op_processor.enqueue_operation(operation, wait=wait)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/utils/disk_utilization.py:171, in ensure_disk_not_overutilize.<locals>.wrapper(*args, **kwargs)
169 @wraps(func)
170 def wrapper(*args: Any, **kwargs: Any) -> None:
--> 171 error_handler(max_disk_utilization, func, *args, **kwargs).run()
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/utils/disk_utilization.py:102, in DiskUtilizationErrorHandlerTemplate.run(self)
100 def run(self) -> None:
101 if not self.max_disk_utilization:
--> 102 return self.handle_limit_not_set()
104 current_utilization = get_disk_utilization_percent()
106 if current_utilization is None:
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/utils/disk_utilization.py:146, in RaisingErrorHandler.handle_limit_not_set(self)
145 def handle_limit_not_set(self) -> None:
--> 146 return self.func(*self.args, **self.kwargs)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/operation_processors/sync_operation_processor.py:75, in SyncOperationProcessor.enqueue_operation(self, op, wait)
73 @ensure_disk_not_overutilize
74 def enqueue_operation(self, op: "Operation", *, wait: bool) -> None:
---> 75 _, errors = self._backend.execute_operations(
76 container_id=self._container_id,
77 container_type=self._container_type,
78 operations=[op],
79 operation_storage=self._operation_storage,
80 )
81 if errors:
82 raise errors[0]
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/backends/hosted_neptune_backend.py:518, in HostedNeptuneBackend.execute_operations(self, container_id, container_type, operations, operation_storage)
506 (
507 artifact_operations_errors,
508 assign_artifact_operations,
(...)
512 artifact_operations=preprocessed_operations.artifact_operations,
513 )
515 errors.extend(artifact_operations_errors)
517 errors.extend(
--> 518 self._execute_operations(
519 container_id,
520 container_type,
521 operations=assign_artifact_operations + preprocessed_operations.other_operations,
522 )
523 )
525 for op in itertools.chain(
526 preprocessed_operations.upload_operations,
527 assign_artifact_operations,
528 preprocessed_operations.other_operations,
529 ):
530 op.clean(operation_storage=operation_storage)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/common/backends/utils.py:79, in with_api_exceptions_handler.<locals>.wrapper(*args, **kwargs)
76 break
78 try:
---> 79 return func(*args, **kwargs)
80 except requests.exceptions.InvalidHeader as e:
81 if "X-Neptune-Api-Token" in e.args[0]:
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/backends/hosted_neptune_backend.py:682, in HostedNeptuneBackend._execute_operations(self, container_id, container_type, operations)
669 kwargs = {
670 "experimentId": container_id,
671 "operations": [
(...)
678 **DEFAULT_REQUEST_KWARGS,
679 }
681 try:
--> 682 result = self.leaderboard_client.api.executeOperations(**kwargs).response().result
683 return [MetadataInconsistency(err.errorDescription) for err in result]
684 except HTTPNotFound as e:
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/backends/swagger_client_wrapper.py:113, in ApiMethodWrapper.__call__(self, *args, **kwargs)
111 return FinishedApiResponseFuture(future.response()) # wait synchronously
112 except HTTPError as e:
--> 113 self.handle_neptune_http_errors(e.response, exception=e)
File ~/prj/organization/project/.conda/lib/python3.11/site-packages/neptune/internal/backends/swagger_client_wrapper.py:102, in ApiMethodWrapper.handle_neptune_http_errors(response, exception)
100 if error_processor:
101 if exception:
--> 102 raise error_processor(body) from exception
103 raise error_processor(body)
105 if exception:
NeptuneFieldCountLimitExceedException:
----NeptuneFieldCountLimitExceedException---------------------------------------------------------------------------------------
There are too many fields (more than 9000) in the organization/transformer-evaluation/TRAN-30 run.
We have stopped the synchronization to the Neptune server and stored the data locally.
To continue uploading the metadata:
1. Delete some excess fields from organization/transformer-evaluation/TRAN-30.
You can delete fields or namespaces with the "del" command.
For example, to delete the "training/checkpoints" namespace:
del run["training/checkpoints"]
2. Once you're done, synchronize the data manually with the following command:
neptune sync -p project_name
For more details, see https://docs.neptune.ai/usage/best_practices
Environment
The output of pip list
:
Package Version
------------------------- --------------
appnope 0.1.4
arrow 1.3.0
asttokens 2.4.1
attrs 23.2.0
backcall 0.2.0
beautifulsoup4 4.12.3
bleach 6.1.0
boto3 1.34.136
botocore 1.34.136
bravado 11.0.3
bravado-core 6.1.1
Brotli 1.0.9
certifi 2024.7.4
charset-normalizer 2.0.4
click 8.1.7
cloudpickle 2.2.1
comm 0.2.2
contourpy 1.2.0
cycler 0.11.0
debugpy 1.6.7
decorator 5.1.1
defusedxml 0.7.1
dill 0.3.8
docker 7.1.0
docopt 0.6.2
exceptiongroup 1.2.0
executing 2.0.1
fastjsonschema 2.20.0
filelock 3.13.1
fonttools 4.51.0
fqdn 1.5.1
future 1.0.0
gitdb 4.0.11
GitPython 3.1.43
gmpy2 2.1.2
google-pasta 0.2.0
idna 3.7
importlib-metadata 6.11.0
importlib_resources 6.4.0
ipykernel 6.29.4
ipympl 0.9.4
ipython 8.12.3
ipython_genutils 0.2.0
ipywidgets 8.1.3
isoduration 20.11.0
itermplot 0.331
jedi 0.19.1
Jinja2 3.1.4
jmespath 1.0.1
jsonpointer 3.0.0
jsonref 1.1.0
jsonschema 4.22.0
jsonschema-specifications 2023.12.1
jupyter_client 8.6.2
jupyter_core 5.7.2
jupyterlab_pygments 0.3.0
jupyterlab_widgets 3.0.11
kiwisolver 1.4.4
loguru 0.7.2
MarkupSafe 2.1.3
matplotlib 3.8.4
matplotlib-inline 0.1.7
mistune 3.0.2
mkl-fft 1.3.8
mkl-random 1.2.4
mkl-service 2.4.0
monotonic 1.6
mpmath 1.3.0
msgpack 1.0.8
multiprocess 0.70.16
nbclient 0.10.0
nbconvert 7.16.4
nbformat 5.10.4
neptune 1.10.4
nest_asyncio 1.6.0
networkx 3.2.1
numpy 1.26.4
oauthlib 3.2.2
packaging 24.1
pandas 1.5.3
pandocfilters 1.5.1
parso 0.8.4
pathos 0.3.2
patsy 0.5.6
pexpect 4.9.0
pickleshare 0.7.5
pillow 10.3.0
pip 24.0
pipreqs 0.5.0
platformdirs 4.2.2
plotly 5.22.0
pooch 1.8.2
pox 0.3.4
ppft 1.7.6.8
prompt_toolkit 3.0.47
protobuf 4.25.3
psutil 5.9.8
ptyprocess 0.7.0
pure-eval 0.2.2
Pygments 2.18.0
PyJWT 2.8.0
pyparsing 3.0.9
PySocks 1.7.1
python-dateutil 2.9.0
pytz 2024.1
PyYAML 6.0.1
pyzmq 24.0.1
referencing 0.35.1
requests 2.32.2
requests-oauthlib 2.0.0
rfc3339-validator 0.1.4
rfc3986-validator 0.1.1
rpds-py 0.18.1
s3transfer 0.10.2
sagemaker 2.224.2
schema 0.7.7
scipy 1.10.1
seaborn 0.13.2
setuptools 69.5.1
simplejson 3.19.2
six 1.16.0
smdebug-rulesconfig 1.0.1
smmap 5.0.1
soupsieve 2.5
stack-data 0.6.2
statsmodels 0.14.2
swagger-spec-validator 3.0.4
sympy 1.12
tblib 3.0.0
tenacity 8.5.0
tinycss2 1.3.0
torch 2.2.2
torchaudio 2.2.2
torchvision 0.17.2
tornado 6.4.1
tqdm 4.66.4
traitlets 5.14.3
types-python-dateutil 2.9.0.20240316
typing_extensions 4.11.0
unicodedata2 15.1.0
uri-template 1.3.0
urllib3 2.2.2
wcwidth 0.2.13
webcolors 24.6.0
webencodings 0.5.1
websocket-client 1.8.0
wheel 0.43.0
widgetsnbextension 4.0.11
yarg 0.1.9
zipp 3.19.2
The operating system you're using: MacOS Sonoma 14.4.1 (23E224)
The output of python --version
: Python 3.11.9