metaflow-service icon indicating copy to clipboard operation
metaflow-service copied to clipboard

Metaflow UI log ERROR

Open LennieGuy opened this issue 11 months ago • 4 comments

Traceback (most recent call last): File "", line 198, in _run_module_as_main File "", line 88, in _run_code File "/root/services/ui_backend_service/data/cache/client/cache_server.py", line 307, in cli(auto_envvar_prefix='MFCACHE') File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1128, in call return self.main(*args, **kwargs) File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1053, in main rv = self.invoke(ctx) File "/usr/local/lib/python3.11/site-packages/click/core.py", line 1395, in invoke return ctx.invoke(self.callback, **ctx.params) File "/usr/local/lib/python3.11/site-packages/click/core.py", line 754, in invoke return __callback(*args, **kwargs) File "/root/services/ui_backend_service/data/cache/client/cache_server.py", line 301, in cli Scheduler(store, max_actions).loop() File "/root/services/ui_backend_service/data/cache/client/cache_server.py", line 196, in init self.pool = multiprocessing.Pool( File "/usr/local/lib/python3.11/multiprocessing/context.py", line 119, in Pool return Pool(processes, initializer, initargs, maxtasksperchild, File "/usr/local/lib/python3.11/multiprocessing/pool.py", line 215, in init self._repopulate_pool() File "/usr/local/lib/python3.11/multiprocessing/pool.py", line 306, in _repopulate_pool return self._repopulate_pool_static(self._ctx, self.Process, File "/usr/local/lib/python3.11/multiprocessing/pool.py", line 329, in _repopulate_pool_static w.start() File "/usr/local/lib/python3.11/multiprocessing/process.py", line 121, in start self._popen = self._Popen(self) File "/usr/local/lib/python3.11/multiprocessing/context.py", line 281, in _Popen return Popen(process_obj) File "/usr/local/lib/python3.11/multiprocessing/popen_fork.py", line 19, in init self._launch(process_obj) File "/usr/local/lib/python3.11/multiprocessing/popen_fork.py", line 71, in _launch code = process_obj._bootstrap(parent_sentinel=child_r) File "/usr/local/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap self.run() File "/usr/local/lib/python3.11/multiprocessing/process.py", line 108, in run self._target(*self._args, **self._kwargs) File "/usr/local/lib/python3.11/multiprocessing/pool.py", line 125, in worker result = (True, func(*args, **kwds)) File "/root/services/ui_backend_service/data/cache/client/cache_worker.py", line 29, in execute_action execute(tempdir, action_cls, request) File "/root/services/ui_backend_service/data/cache/client/cache_worker.py", line 51, in execute res = action_cls.execute( File "/root/services/ui_backend_service/data/cache/get_log_file_action.py", line 133, in execute with streamed_errors(stream_output): File "/usr/local/lib/python3.11/contextlib.py", line 155, in exit self.gen.throw(typ, value, traceback) File "/root/services/ui_backend_service/data/cache/utils.py", line 130, in streamed_errors get_traceback_str() File "/root/services/ui_backend_service/data/cache/utils.py", line 124, in streamed_errors yield File "/root/services/ui_backend_service/data/cache/get_log_file_action.py", line 136, in execute current_hash = log_provider.get_log_hash(task, logtype) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/root/services/ui_backend_service/data/cache/get_log_file_action.py", line 270, in get_log_hash return get_log_size(task, logtype) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/root/services/ui_backend_service/data/cache/get_log_file_action.py", line 177, in get_log_size return task.stderr_size if logtype == STDERR else task.stdout_size ^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/metaflow/client/core.py", line 1317, in stdout_size return self._get_logsize("stdout") ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/metaflow/client/core.py", line 1433, in _get_logsize meta_dict = self.metadata_dict ^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/metaflow/client/core.py", line 1135, in metadata_dict m.name: m.value for m in sorted(self.metadata, key=lambda m: m.created_at) ^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/metaflow/client/core.py", line 1059, in metadata all_metadata = self._metaflow.metadata.get_object( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/metaflow/metadata/metadata.py", line 425, in get_object pre_filter = cls._get_object_internal( ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/metaflow/plugins/metadata/service.py", line 280, in _get_object_internal v, _ = cls._request(None, url, "GET") ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/metaflow/plugins/metadata/service.py", line 468, in _request raise ServiceException(

metaflow.plugins.metadata.service.ServiceException: Metadata request (/flows/ParquetCheck/runs/argo-parquetcheck.user.zhangxinyu19.parquetcheck-g8vjm/steps/start/tasks/t-2aa87376/metadata) failed (code 500): "{"err_msg": {"type": "timeout error"}}"

LennieGuy avatar Jul 24 '23 15:07 LennieGuy

@jfernandez need your help image

LennieGuy avatar Jul 24 '23 15:07 LennieGuy

@LennieGuy what's the output of

from metaflow import namespace, Task

namespace(None)
Task('ParquetCheck/argo-parquetcheck.user.zhangxinyu19.parquetcheck-g8vjm/start/t-2aa87376').metadata_dict

savingoyal avatar Jul 24 '23 19:07 savingoyal

image

LennieGuy avatar Jul 25 '23 01:07 LennieGuy

@savingoyal This method of use will report the same error, is the database problem? I checked and found another two million in the database

LennieGuy avatar Jul 25 '23 01:07 LennieGuy