阻止在特殊情况下中文字符导致的程序崩溃,以及空白输入的重复响应
我新拉取的代码,启动之后还是程序报错。报错如下:
欢迎使用 MOSS 人工智能助手!输入内容即可进行对话。输入 clear 以清空对话历史,输入 stop 以终止对话。
<|Human|>: 你好
after query
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /data/MOSS/moss_cli_demo.py:100 in │
│ │
│ 97 │ │ │ print(response.lstrip('\n')) │
│ 98 │
│ 99 if name == "main": │
│ ❱ 100 │ main() │
│ 101 │
│ │
│ /data/MOSS/moss_cli_demo.py:82 in main │
│ │
│ 79 │ │ prompt += '<|Human|>: ' + query + '' │
│ 80 │ │ inputs = tokenizer(prompt, return_tensors="pt") │
│ 81 │ │ with torch.no_grad(): │
│ ❱ 82 │ │ │ outputs = model.generate( │
│ 83 │ │ │ │ inputs.input_ids.cuda(), │
│ 84 │ │ │ │ attention_mask=inputs.attention_mask.cuda(), │
│ 85 │ │ │ │ max_length=2048, │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27 in decorate_context │
│ │
│ 24 │ │ @functools.wraps(func) │
│ 25 │ │ def decorate_context(*args, **kwargs): │
│ 26 │ │ │ with self.clone(): │
│ ❱ 27 │ │ │ │ return func(*args, **kwargs) │
│ 28 │ │ return cast(F, decorate_context) │
│ 29 │ │
│ 30 │ def _wrap_generator(self, func): │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/transformers/generation/utils.py:1571 in generate │
│ │
│ 1568 │ │ │ ) │
│ 1569 │ │ │ │
│ 1570 │ │ │ # 12. run sample │
│ ❱ 1571 │ │ │ return self.sample( │
│ 1572 │ │ │ │ input_ids, │
│ 1573 │ │ │ │ logits_processor=logits_processor, │
│ 1574 │ │ │ │ logits_warper=logits_warper, │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/transformers/generation/utils.py:2534 in sample │
│ │
│ 2531 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) │
│ 2532 │ │ │ │
│ 2533 │ │ │ # forward pass to get next token │
│ ❱ 2534 │ │ │ outputs = self( │
│ 2535 │ │ │ │ **model_inputs, │
│ 2536 │ │ │ │ return_dict=True, │
│ 2537 │ │ │ │ output_attentions=output_attentions, │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /data/MOSS/models/modeling_moss.py:678 in forward │
│ │
│ 675 │ │ """ │
│ 676 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 677 │ │ │
│ ❱ 678 │ │ transformer_outputs = self.transformer( │
│ 679 │ │ │ input_ids, │
│ 680 │ │ │ past_key_values=past_key_values, │
│ 681 │ │ │ attention_mask=attention_mask, │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /data/MOSS/models/modeling_moss.py:545 in forward │
│ │
│ 542 │ │ │ │ │ head_mask[i], │
│ 543 │ │ │ │ ) │
│ 544 │ │ │ else: │
│ ❱ 545 │ │ │ │ outputs = block( │
│ 546 │ │ │ │ │ hidden_states=hidden_states, │
│ 547 │ │ │ │ │ layer_past=layer_past, │
│ 548 │ │ │ │ │ attention_mask=attention_mask, │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /data/MOSS/models/modeling_moss.py:270 in forward │
│ │
│ 267 │ ) -> Union[Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor │
│ 268 │ │ residual = hidden_states │
│ 269 │ │ hidden_states = self.ln_1(hidden_states) │
│ ❱ 270 │ │ attn_outputs = self.attn( │
│ 271 │ │ │ hidden_states=hidden_states, │
│ 272 │ │ │ layer_past=layer_past, │
│ 273 │ │ │ attention_mask=attention_mask, │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /data/MOSS/models/modeling_moss.py:164 in forward │
│ │
│ 161 │ │ Tuple[torch.Tensor, Tuple[torch.Tensor]], │
│ 162 │ │ Optional[Tuple[torch.Tensor, Tuple[torch.Tensor], Tuple[torch.Tensor, ...]]], │
│ 163 │ ]: │
│ ❱ 164 │ │ qkv = self.qkv_proj(hidden_states) │
│ 165 │ │ # TODO(enijkamp): factor out number of logical TPU-v4 cores or make forward pass │
│ 166 │ │ mp_num = 4 │
│ 167 │ │ qkv_split = qkv.reshape(qkv.shape[:-1] + (mp_num, -1)) │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │
│ │
│ 1191 │ │ # this function, and just call forward. │
│ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1194 │ │ │ return forward_call(input, **kwargs) │
│ 1195 │ │ # Do not call functions when jit is used │
│ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /data/MOSS/models/quantization.py:367 in forward │
│ │
│ 364 │ │
│ 365 │ def forward(self, x): │
│ 366 │ │ out_shape = x.shape[:-1] + (self.outfeatures,) │
│ ❱ 367 │ │ out = QuantLinearFunction.apply(x.reshape(-1, x.shape[-1]), self.qweight, self.s │
│ 368 │ │ │ │ │ │ │ │ │ │ self.qzeros, self.g_idx, self.bits, self.maxq) │
│ 369 │ │ out = out + self.bias if self.bias is not None else out │
│ 370 │ │ return out.reshape(out_shape) │
│ │
│ /opt/miniconda3/lib/python3.10/site-packages/torch/cuda/amp/autocast_mode.py:105 in decorate_fwd │
│ │
│ 102 │ │ │ │ with autocast(enabled=False): │
│ 103 │ │ │ │ │ return fwd(_cast(args, cast_inputs), **_cast(kwargs, cast_inputs)) │
│ 104 │ │ │ else: │
│ ❱ 105 │ │ │ │ return fwd(*args, **kwargs) │
│ 106 │ return decorate_fwd │
│ 107 │
│ 108 │
│ │
│ /data/MOSS/models/quantization.py:279 in forward │
│ │
│ 276 │ @staticmethod │
│ 277 │ @custom_fwd(cast_inputs=torch.float16) │
│ 278 │ def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq): │
│ ❱ 279 │ │ output = matmul248(input, qweight, scales, qzeros, g_idx, bits, maxq) │
│ 280 │ │ ctx.save_for_backward(qweight, scales, qzeros, g_idx) │
│ 281 │ │ ctx.bits, ctx.maxq = bits, maxq │
│ 282 │ │ return output │
│ │
│ /data/MOSS/models/quantization.py:250 in matmul248 │
│ │
│ 247 │ output = torch.empty((input.shape[0], qweight.shape[1]), device='cuda', dtype=torch. │
│ 248 │ grid = lambda META: ( │
│ 249 │ triton.cdiv(input.shape[0], META['BLOCK_SIZE_M']) * triton.cdiv(qweight.shape[1], ME │
│ ❱ 250 │ matmul_248_kernel[grid](input, qweight, output, │
│ 251 │ │ │ │ │ │ │ scales, qzeros, g_idx, │
│ 252 │ │ │ │ │ │ │ input.shape[0], qweight.shape[1], input.shape[1], bits, maxq │
│ 253 │ │ │ │ │ │ │ input.stride(0), input.stride(1), │
│ │
│ /data/MOSS/models/custom_autotune.py:93 in run │
│ │
│ 90 │ │ │ │ │ │ │ for config in pruned_configs} │
│ 91 │ │ │ │ bench_end = time.time() │
│ 92 │ │ │ │ self.bench_time = bench_end - bench_start │
│ ❱ 93 │ │ │ │ self.cache[key] = builtins.min(timings, key=timings.get) │
│ 94 │ │ │ │ self.hook(args) │
│ 95 │ │ │ │ self.configs_timings = timings │
│ 96 │ │ │ config = self.cache[key] │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
TypeError: '<' not supported between instances of 'tuple' and 'float'