ai-training-examples
ai-training-examples copied to clipboard
AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'
While running model, tokenizer = load_model(model_name, bnb_config)
I am getting the following error,
AttributeError Traceback (most recent call last) Cell In[33], line 4 2 model_name = "meta-llama/Llama-2-7b-hf" 3 bnb_config = create_bnb_config() ----> 4 model, tokenizer = load_model(model_name, bnb_config)
Cell In[4], line 5, in load_model(model_name, bnb_config) 2 n_gpus = torch.cuda.device_count() 3 max_memory = f'{40960}MB' ----> 5 model = AutoModelForCausalLM.from_pretrained( 6 model_name, 7 quantization_config=bnb_config, 8 device_map="auto", # dispatch efficiently the model on the available ressources 9 max_memory = {i: max_memory for i in range(n_gpus)}, 10 ) 11 tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True) 13 # Needed for LLaMA tokenizer
File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 564 elif type(config) in cls._model_mapping.keys(): 565 model_class = _get_model_class(config, cls._model_mapping) --> 566 return model_class.from_pretrained( 567 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs 568 ) 569 raise ValueError( 570 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n" 571 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}." 572 )
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:3480, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs) 3471 if dtype_orig is not None: 3472 torch.set_default_dtype(dtype_orig) 3473 ( 3474 model, 3475 missing_keys, 3476 unexpected_keys, 3477 mismatched_keys, 3478 offload_index, 3479 error_msgs, -> 3480 ) = cls._load_pretrained_model( 3481 model, 3482 state_dict, 3483 loaded_state_dict_keys, # XXX: rename? 3484 resolved_archive_file, 3485 pretrained_model_name_or_path, 3486 ignore_mismatched_sizes=ignore_mismatched_sizes, 3487 sharded_metadata=sharded_metadata, 3488 _fast_init=_fast_init, 3489 low_cpu_mem_usage=low_cpu_mem_usage, 3490 device_map=device_map, 3491 offload_folder=offload_folder, 3492 offload_state_dict=offload_state_dict, 3493 dtype=torch_dtype, 3494 is_quantized=(getattr(model, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES), 3495 keep_in_fp32_modules=keep_in_fp32_modules, 3496 ) 3498 model.is_loaded_in_4bit = load_in_4bit 3499 model.is_loaded_in_8bit = load_in_8bit
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:3870, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules) 3868 if low_cpu_mem_usage: 3869 if not is_fsdp_enabled() or is_fsdp_enabled_and_dist_rank_0(): -> 3870 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( 3871 model_to_load, 3872 state_dict, 3873 loaded_keys, 3874 start_prefix, 3875 expected_keys, 3876 device_map=device_map, 3877 offload_folder=offload_folder, 3878 offload_index=offload_index, 3879 state_dict_folder=state_dict_folder, 3880 state_dict_index=state_dict_index, 3881 dtype=dtype, 3882 is_quantized=is_quantized, 3883 is_safetensors=is_safetensors, 3884 keep_in_fp32_modules=keep_in_fp32_modules, 3885 ) 3886 error_msgs += new_error_msgs 3887 else:
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:751, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules) 748 fp16_statistics = None 750 if "SCB" not in param_name: --> 751 set_module_quantized_tensor_to_device( 752 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics 753 ) 755 return error_msgs, offload_index, state_dict_index
File /opt/conda/lib/python3.10/site-packages/transformers/integrations/bitsandbytes.py:98, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics) 96 new_value = bnb.nn.Int8Params(new_value, requires_grad=False, **kwargs).to(device) 97 elif is_4bit: ---> 98 new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(device) 100 module._parameters[tensor_name] = new_value 101 if fp16_statistics is not None:
File /opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:179, in Params4bit.to(self, *args, **kwargs) 176 device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs) 178 if (device is not None and device.type == "cuda" and self.data.device.type == "cpu"): --> 179 return self.cuda(device) 180 else: 181 s = self.quant_state
File /opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:157, in Params4bit.cuda(self, device) 155 def cuda(self, device): 156 w = self.data.contiguous().half().cuda(device) --> 157 w_4bit, quant_state = bnb.functional.quantize_4bit(w, blocksize=self.blocksize, compress_statistics=self.compress_statistics, quant_type=self.quant_type) 158 self.data = w_4bit 159 self.quant_state = quant_state
File /opt/conda/lib/python3.10/site-packages/bitsandbytes/functional.py:832, in quantize_4bit(A, absmax, out, blocksize, compress_statistics, quant_type) 830 lib.cquantize_blockwise_fp16_fp4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n)) 831 else: --> 832 lib.cquantize_blockwise_fp16_nf4(get_ptr(None), get_ptr(A), get_ptr(absmax), get_ptr(out), ct.c_int32(blocksize), ct.c_int(n)) 833 elif A.dtype == torch.bfloat16: 834 if quant_type == 'fp4':
AttributeError: 'NoneType' object has no attribute 'cquantize_blockwise_fp16_nf4'
我也出现了这个问题,怎么解决呀?