metaseq
metaseq copied to clipboard
Remove Namespace hack
See: https://github.com/facebookresearch/metaseq/blob/9afea52f5988fcbfe6133591fc27dd56044bd4ea/metaseq/checkpoint_utils.py#L424-L425
Previous attempt at removing this hack broke generation, evals, and resuming training from checkpoint (namespaces were slipping through, despite conversion to omegaconf - need to track down where that happens).
Hi @suchenzang ,
The hack does not seem to work well on my machine.
After the setup, running the following command will raise omegaconf._utils has no attribute 'is_primitive_type'
:
opt-baselines -n 1 -g 8 \
--python /data/envs/conda/envs/py37/bin/python \
--data /data/repos/opt/tmp-data \
--checkpoints-dir /data/repos/opt/tmp-ckpts \
--model-size 125m -p debug --azure --local
/data/repos/opt/metaseq/slurm_snapshot_code_oss/2022-05-24T10_31_19.647829/metaseq/dataclass/utils.py:357: UserWarning:
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
with initialize(config_path=config_path):
Traceback (most recent call last):
File "./slurm_snapshot_code_oss/2022-05-24T10_31_19.647829/metaseq_cli/train.py", line 591, in <module>
cli_main()
File "./slurm_snapshot_code_oss/2022-05-24T10_31_19.647829/metaseq_cli/train.py", line 574, in cli_main
cfg = convert_namespace_to_omegaconf(args)
File "/data/repos/opt/metaseq/slurm_snapshot_code_oss/2022-05-24T10_31_19.647829/metaseq/dataclass/utils.py", line 375, in convert_namespace_to_omegaconf
old_primitive = _utils.is_primitive_type
AttributeError: module 'omegaconf._utils' has no attribute 'is_primitive_type'
But if I downgrade to omegaconf==2.0.*
, I will get 'wheel>=0.26' not found
:
Traceback (most recent call last):
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 573, in _build_master
ws.require(__requires__)
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 891, in require
needed = self.resolve(parse_requirements(requirements))
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 782, in resolve
raise VersionConflict(dist, req).with_context(dependent_req)
pkg_resources.ContextualVersionConflict: (omegaconf 2.0.0 (/data/envs/conda/envs/py37/lib/python3.7/site-packages), Requirement.parse('omegaconf~=2.2'), {'hydra-core'})
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/data/envs/conda/envs/py37/bin/opt-baselines", line 33, in <module>
sys.exit(load_entry_point('metaseq', 'console_scripts', 'opt-baselines')())
File "/data/envs/conda/envs/py37/bin/opt-baselines", line 25, in importlib_load_entry_point
return next(matches).load()
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/importlib_metadata/__init__.py", line 203, in load
module = import_module(match.group('module'))
File "/data/envs/conda/envs/py37/lib/python3.7/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 953, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 953, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 1006, in _gcd_import
File "<frozen importlib._bootstrap>", line 983, in _find_and_load
File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/data/repos/opt/metaseq/metaseq/__init__.py", line 20, in <module>
from metaseq.distributed import utils as distributed_utils
File "/data/repos/opt/metaseq/metaseq/distributed/__init__.py", line 6, in <module>
from .distributed_timeout_wrapper import DistributedTimeoutWrapper
File "/data/repos/opt/metaseq/metaseq/distributed/distributed_timeout_wrapper.py", line 11, in <module>
from torch import nn
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/torch/__init__.py", line 29, in <module>
from .torch_version import __version__ as __version__
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/torch/torch_version.py", line 3, in <module>
from pkg_resources import packaging # type: ignore[attr-defined]
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 3266, in <module>
@_call_aside
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 3241, in _call_aside
f(*args, **kwargs)
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 3279, in _initialize_master_working_set
working_set = WorkingSet._build_master()
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 575, in _build_master
return cls._build_from_requirements(__requires__)
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 588, in _build_from_requirements
dists = ws.resolve(reqs, Environment())
File "/data/envs/conda/envs/py37/lib/python3.7/site-packages/pkg_resources/__init__.py", line 777, in resolve
raise DistributionNotFound(req, requirers)
pkg_resources.DistributionNotFound: The 'wheel>=0.26' distribution was not found and is required by tensorboard
I am not sure what is the correct way to fix it :(
This seems unrelated to this task - please open a separate issue tracking. For reference, we are using version 2.1.1 for omegaconf (should pin this version here too).
Thanks for the information. Specifying omegaconf==2.1.1
in setup.py
solves my problem