D:\GPT4All_GPU\venv\Scripts\python.exe D:/GPT4All_GPU/main.py
CUDA version: 11.1
NVIDIA GeForce RTX 3060
Loading checkpoint shards: 100%|██████████| 33/33 [00:12<00:00, 2.68it/s]
┌───────────────────── Traceback (most recent call last) ─────────────────────┐
│ D:/GPT4All_GPU/main.py:15 in │
│ │
│ 12 print(torch.cuda.get_device_name(0)) │
│ 13 │
│ 14 #m = GPT4AllGPU('D:/GPT4All_GPU/llama-7b-hf') │
│ > 15 m = GPT4AllGPU('D:/GPT4All_GPU/llama-7b-hf/') │
│ 16 config = {'num_beams': 2, │
│ 17 │ │ 'min_new_tokens': 10, │
│ 18 │ │ 'max_length': 100, │
│ │
│ D:\GPT4All_GPU\venv\lib\site-packages\nomic\gpt4all\gpt4all.py:38 in │
│ init │
│ │
│ 35 │ │ self.model = PeftModelForCausalLM.from_pretrained(self.model, │
│ 36 │ │ │ │ │ │ │ │ │ │ │ │ │ │ self.lora_p │
│ 37 │ │ │ │ │ │ │ │ │ │ │ │ │ │ device_map= │
│ > 38 │ │ │ │ │ │ │ │ │ │ │ │ │ │ torch_dtype │
│ 39 │ │ self.model.to(dtype=torch.float16) │
│ 40 │ │ print(f"Mem needed: {self.model.get_memory_footprint() / 1024 │
│ 41 │
│ │
│ D:\GPT4All_GPU\gpt4all\peft\src\peft\peft_model.py:177 in from_pretrained │
│ │
│ 174 │ │ │ │ device_map = infer_auto_device_map( │
│ 175 │ │ │ │ │ model, max_memory=max_memory, no_split_module_cla │
│ 176 │ │ │ │ ) │
│ > 177 │ │ │ model = dispatch_model(model, device_map=device_map) │
│ 178 │ │ │ hook = AlignDevicesHook(io_same_device=True) │
│ 179 │ │ │ if model.peft_config.peft_type == PeftType.LORA: │
│ 180 │ │ │ │ add_hook_to_module(model.base_model.model, hook) │
│ │
│ D:\GPT4All_GPU\venv\lib\site-packages\accelerate\big_modeling.py:376 in │
│ dispatch_model │
│ │
│ 373 │ │ offload=offload, │
│ 374 │ │ offload_buffers=offload_buffers, │
│ 375 │ │ weights_map=weights_map, │
│ > 376 │ │ preload_module_classes=preload_module_classes, │
│ 377 │ ) │
│ 378 │ # Attaching the hook may break tied weights, so we retie them │
│ 379 │ retie_parameters(model, tied_params) │
│ │
│ D:\GPT4All_GPU\venv\lib\site-packages\accelerate\hooks.py:478 in │
│ attach_align_device_hook_on_blocks │
│ │
│ 475 │ │ │ io_same_device=(module_name == ""), │
│ 476 │ │ │ place_submodules=True, │
│ 477 │ │ ) │
│ > 478 │ │ add_hook_to_module(module, hook) │
│ 479 │ │ attach_execution_device_hook(module, execution_device[module_ │
│ 480 │ elif module_name in execution_device and module_name in offload: │
│ 481 │ │ attach_align_device_hook( │
│ │
│ D:\GPT4All_GPU\venv\lib\site-packages\accelerate\hooks.py:155 in │
│ add_hook_to_module │
│ │
│ 152 │ │ old_forward = module.forward │
│ 153 │ │ module._old_forward = old_forward │
│ 154 │ │
│ > 155 │ module = hook.init_hook(module) │
│ 156 │ module._hf_hook = hook │
│ 157 │ │
│ 158 │ @functools.wraps(old_forward) │
│ │
│ D:\GPT4All_GPU\venv\lib\site-packages\accelerate\hooks.py:251 in init_hook │
│ │
│ 248 │ def init_hook(self, module): │
│ 249 │ │ if not self.offload and self.execution_device is not None: │
│ 250 │ │ │ for name, _ in named_module_tensors(module, recurse=self. │
│ > 251 │ │ │ │ set_module_tensor_to_device(module, name, self.execut │
│ 252 │ │ elif self.offload: │
│ 253 │ │ │ self.original_devices = { │
│ 254 │ │ │ │ name: param.device for name, param in named_module_te │
│ │
│ D:\GPT4All_GPU\venv\lib\site-packages\accelerate\utils\modeling.py:136 in │
│ set_module_tensor_to_device │
│ │
│ 133 │ old_value = getattr(module, tensor_name) │
│ 134 │ │
│ 135 │ if old_value.device == torch.device("meta") and device not in ["m │
│ > 136 │ │ raise ValueError(f"{tensor_name} is on the meta device, we ne │
│ 137 │ │
│ 138 │ if value is not None: │
│ 139 │ │ if dtype is None: │
└─────────────────────────────────────────────────────────────────────────────┘
ValueError: weight is on the meta device, we need a value
to put in on cpu.
Process finished with exit code 1
What is the error, how to fix it?
Hi, I face the exact same issue with Linux.
I think you don't have sufficient GPU to place all the tensors..
Stale, please open a new issue if this is still relevant.