FastChat
FastChat copied to clipboard
RuntimeError: Tensor on device cpu is not on the expected device meta!"
Hi , i am tying to run Fastchat in cpu only mode , i and have the following error , any idea of what i need to do ?
``debian@srv-azrod:~$ python3 -m fastchat.serve.cli --model-pat vicunaWeight --device cpu
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:05<00:00, 1.89s/it]Some weights of LlamaForCausalLM were not initialized from the model checkpoint at vicunaWeight and are newly initialized: ['model.layers.37.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.18.self_attn.rotary_emb.inv_freq', 'model.layers.38.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.35.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.36.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_attn.rotary_emb.inv_freq', 'model.layers.27.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.32.self_attn.rotary_emb.inv_freq', 'model.layers.29.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.30.self_attn.rotary_emb.inv_freq', 'model.layers.33.self_attn.rotary_emb.inv_freq', 'model.layers.28.self_attn.rotary_emb.inv_freq', 'model.layers.24.self_attn.rotary_emb.inv_freq', 'model.layers.34.self_attn.rotary_emb.inv_freq', 'model.layers.39.self_attn.rotary_emb.inv_freq']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
USER: jouons a un jeu de role
ASSISTANT: ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /usr/lib/python3.9/runpy.py:197 in _run_module_as_main │
│ │
│ 194 │ main_globals = sys.modules["main"].dict │
│ 195 │ if alter_argv: │
│ 196 │ │ sys.argv[0] = mod_spec.origin │
│ ❱ 197 │ return _run_code(code, main_globals, None, │
│ 198 │ │ │ │ │ "main", mod_spec) │
│ 199 │
│ 200 def run_module(mod_name, init_globals=None, │
│ │
│ /usr/lib/python3.9/runpy.py:87 in _run_code │
│ │
│ 84 │ │ │ │ │ loader = loader, │
│ 85 │ │ │ │ │ package = pkg_name, │
│ 86 │ │ │ │ │ spec = mod_spec) │
│ ❱ 87 │ exec(code, run_globals) │
│ 88 │ return run_globals │
│ 89 │
│ 90 def _run_module_code(code, init_globals=None, │
│ │
│ /home/debian/FastChat/fastchat/serve/cli.py:133 in None to a generator fires it up │
│ 34 │ │ │ with ctx_factory(): │
│ ❱ 35 │ │ │ │ response = gen.send(None) │
│ 36 │ │ │ │
│ 37 │ │ │ while True: │
│ 38 │ │ │ │ try: │
│ │
│ /home/debian/FastChat/fastchat/serve/inference.py:153 in generate_stream │
│ │
│ 150 │ │ │ │ logits = out.logits │
│ 151 │ │ │ │ past_key_values = out.past_key_values │
│ 152 │ │ │ else: │
│ ❱ 153 │ │ │ │ out = model( │
│ 154 │ │ │ │ │ torch.as_tensor([input_ids], device=device), use_cache=True) │
│ 155 │ │ │ │ logits = out.logits │
│ 156 │ │ │ │ past_key_values = out.past_key_values │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py:687 │
│ in forward │
│ │
│ 684 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 685 │ │ │
│ 686 │ │ # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) │
│ ❱ 687 │ │ outputs = self.model( │
│ 688 │ │ │ input_ids=input_ids, │
│ 689 │ │ │ attention_mask=attention_mask, │
│ 690 │ │ │ position_ids=position_ids, │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py:577 │
│ in forward │
│ │
│ 574 │ │ │ │ │ None, │
│ 575 │ │ │ │ ) │
│ 576 │ │ │ else: │
│ ❱ 577 │ │ │ │ layer_outputs = decoder_layer( │
│ 578 │ │ │ │ │ hidden_states, │
│ 579 │ │ │ │ │ attention_mask=attention_mask, │
│ 580 │ │ │ │ │ position_ids=position_ids, │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py:289 │
│ in forward │
│ │
│ 286 │ │ │
│ 287 │ │ residual = hidden_states │
│ 288 │ │ │
│ ❱ 289 │ │ hidden_states = self.input_layernorm(hidden_states) │
│ 290 │ │ │
│ 291 │ │ # Self Attention │
│ 292 │ │ hidden_states, self_attn_weights, present_key_value = self.self_attn( │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/nn/modules/module.py:1501 in _call_impl │
│ │
│ 1498 │ │ if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks │
│ 1499 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hooks │
│ 1500 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1501 │ │ │ return forward_call(*args, **kwargs) │
│ 1502 │ │ # Do not call functions when jit is used │
│ 1503 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1504 │ │ backward_pre_hooks = [] │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py:91 │
│ in forward │
│ │
│ 88 │ │ if self.weight.dtype in [torch.float16, torch.bfloat16]: │
│ 89 │ │ │ hidden_states = hidden_states.to(self.weight.dtype) │
│ 90 │ │ │
│ ❱ 91 │ │ return self.weight * hidden_states │
│ 92 │
│ 93 │
│ 94 class LlamaRotaryEmbedding(torch.nn.Module): │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/_prims_common/wrappers.py:220 in _fn │
│ │
│ 217 │ │ │ │ │ if k not in kwargs: │
│ 218 │ │ │ │ │ │ kwargs[k] = out_attr │
│ 219 │ │ │ │
│ ❱ 220 │ │ │ result = fn(*args, **kwargs) │
│ 221 │ │ │ assert ( │
│ 222 │ │ │ │ isinstance(result, TensorLike) │
│ 223 │ │ │ │ and is_tensor │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/_prims_common/wrappers.py:130 in _fn │
│ │
│ 127 │ │ │ } │
│ 128 │ │ │ bound.arguments.update(promoted_args) │
│ 129 │ │ │ │
│ ❱ 130 │ │ │ result = fn(**bound.arguments) │
│ 131 │ │ │ │
│ 132 │ │ │ if isinstance(result, TensorLike): │
│ 133 │ │ │ │ return _maybe_convert_to_dtype(result, result_dtype) │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/_refs/init.py:926 in _ref │
│ │
│ 923 │ │ │ │ ValueError, │
│ 924 │ │ │ ) │
│ 925 │ │ │ a, b = _maybe_broadcast(a, b) │
│ ❱ 926 │ │ │ return prim(a, b) │
│ 927 │ │ │
│ 928 │ │ if has_out: │
│ 929 │ │ │ _ref = out_wrapper()(_ref) │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/_refs/init.py:1532 in mul │
│ │
│ 1529 │ supports_two_python_scalars=True, │
│ 1530 ) │
│ 1531 def mul(a: TensorLikeType, b: TensorLikeType) -> TensorLikeType: │
│ ❱ 1532 │ return prims.mul(a, b) │
│ 1533 │
│ 1534 │
│ 1535 # TODO: add docstring │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/ops.py:287 in call │
│ │
│ 284 │ │ ) │
│ 285 │ │
│ 286 │ def call(self, *args, **kwargs): │
│ ❱ 287 │ │ return self.op(*args, **kwargs or {}) │
│ 288 │ │
│ 289 │ def hash(self): │
│ 290 │ │ return hash(self.op) │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/prims/init.py:346 in │
│ elementwise_meta │
│ │
│ 343 │ if args_with_fixed_dtypes is not None: │
│ 344 │ │ args = list(args_with_fixed_dtypes) + args │
│ 345 │ │
│ ❱ 346 │ utils.check_same_device(*args, allow_cpu_scalar_tensors=True) │
│ 347 │ utils.check_same_shape(*args, allow_cpu_scalar_tensors=True) │
│ 348 │ │
│ 349 │ strides = utils.compute_elementwise_output_strides(*args) │
│ │
│ /home/debian/.local/lib/python3.9/site-packages/torch/_prims_common/init.py:596 in │
│ check_same_device │
│ │
│ 593 │ │ │ │ │ + str(device) │
│ 594 │ │ │ │ │ + "!" │
│ 595 │ │ │ │ ) │
│ ❱ 596 │ │ │ │ raise RuntimeError(msg) │
│ 597 │ │ else: │
│ 598 │ │ │ msg = ( │
│ 599 │ │ │ │ "Unexpected type when checking for same device, " + str(type(arg)) + "!" │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Tensor on device cpu is not on the expected device meta!"