ColossalAI
ColossalAI copied to clipboard
[BUG]: colossalai check error
🐛 Describe the bug
pytorch 1.9+cuda11.1+Nvidia A30 GPU
after install successfully, run colossalai check -i
show the error below.
Traceback (most recent call last):
File "/home/liuzixi01/.conda/envs/torch-cuda11/bin/colossalai", line 5, in <module>
from colossalai.cli import cli
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/__init__.py", line 1, in <module>
from .initialize import (initialize, launch, launch_from_openmpi,
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/initialize.py", line 23, in <module>
from colossalai.engine.schedule import NonPipelineSchedule, PipelineSchedule, InterleavedPipelineSchedule, get_tensor_shape
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/engine/__init__.py", line 1, in <module>
from ._base_engine import Engine
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/engine/_base_engine.py", line 11, in <module>
from colossalai.engine.schedule import BaseSchedule, NonPipelineSchedule, PipelineSchedule, InterleavedPipelineSchedule
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/engine/schedule/__init__.py", line 2, in <module>
from ._pipeline_schedule import PipelineSchedule, InterleavedPipelineSchedule, get_tensor_shape
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/engine/schedule/_pipeline_schedule.py", line 9, in <module>
from colossalai.amp.naive_amp import NaiveAMPModel
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/amp/__init__.py", line 9, in <module>
from .torch_amp import convert_to_torch_amp
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/amp/torch_amp/__init__.py", line 5, in <module>
from .torch_amp import TorchAMPOptimizer, TorchAMPModel, TorchAMPLoss
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/amp/torch_amp/torch_amp.py", line 12, in <module>
from colossalai.nn.optimizer import ColossalaiOptimizer
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/nn/__init__.py", line 1, in <module>
from .layer import *
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/nn/layer/__init__.py", line 7, in <module>
from .moe import *
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/nn/layer/moe/__init__.py", line 1, in <module>
from .experts import Experts, FFNExperts, TPExperts
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/nn/layer/moe/experts.py", line 8, in <module>
from colossalai.zero.init_ctx import no_shard_zero_decrator
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/__init__.py", line 6, in <module>
from colossalai.zero.sharded_model.sharded_model_v2 import ShardedModelV2
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/sharded_model/__init__.py", line 1, in <module>
from .sharded_model_v2 import ShardedModelV2
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/sharded_model/sharded_model_v2.py", line 12, in <module>
from colossalai.zero.utils import ZeroHook
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/utils/__init__.py", line 1, in <module>
from .zero_hook import ZeroHook
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/utils/zero_hook.py", line 10, in <module>
from colossalai.zero.shard_utils import BaseShardStrategy
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/shard_utils/__init__.py", line 1, in <module>
from .base_shard_strategy import BaseShardStrategy
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/shard_utils/base_shard_strategy.py", line 5, in <module>
from colossalai.zero.sharded_param.sharded_tensor import ShardedTensor
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/sharded_param/__init__.py", line 1, in <module>
from colossalai.zero.sharded_param.sharded_tensor import ShardedTensor
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/zero/sharded_param/sharded_tensor.py", line 2, in <module>
from colossalai.gemini.stateful_tensor import StatefulTensor, TensorState
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/gemini/__init__.py", line 1, in <module>
from .stateful_tensor_mgr import StatefulTensorMgr
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/gemini/stateful_tensor_mgr.py", line 7, in <module>
from colossalai.gemini.tensor_placement_policy import TensorPlacementPolicy
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/gemini/tensor_placement_policy.py", line 10, in <module>
from colossalai.gemini.memory_tracer import MemStatsCollector
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/gemini/memory_tracer/__init__.py", line 3, in <module>
from .memstats_collector import MemStatsCollector
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/gemini/memory_tracer/memstats_collector.py", line 5, in <module>
from colossalai.tensor import ChunkManager
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/tensor/__init__.py", line 3, in <module>
from .colo_tensor import ColoTensor
File "/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/colossalai/tensor/colo_tensor.py", line 9, in <module>
from torch.overrides import get_default_nowrap_functions
ImportError: cannot import name 'get_default_nowrap_functions' from 'torch.overrides' (/home/liuzixi01/.conda/envs/torch-cuda11/lib/python3.8/site-packages/torch/overrides.py)
Environment
pytorch 1.9 + cuda 11.1
I think this is a version mismatch issue. Can you try it with torch 1.10 or 1.11?
pytorch 1.8.0+cuda11.1+Nvidia RTX 3090, I have encountered the same error.
@wohaocaiji @zixiliuUSC I think the import error has been fixed in the latest main branch.