MOSS bugfix

bugfix

Open 34245621 opened this issue 2 years ago • 1 comments

阻止在特殊情况下中文字符导致的程序崩溃，以及空白输入的重复响应

May 02 '23 01:05 34245621

我新拉取的代码，启动之后还是程序报错。报错如下：欢迎使用 MOSS 人工智能助手！输入内容即可进行对话。输入 clear 以清空对话历史，输入 stop 以终止对话。 <|Human|>: 你好 after query ╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮ │ /data/MOSS/moss_cli_demo.py:100 in │ │ │ │ 97 │ │ │ print(response.lstrip('\n')) │ │ 98 │ │ 99 if name == "main": │ │ ❱ 100 │ main() │ │ 101 │ │ │ │ /data/MOSS/moss_cli_demo.py:82 in main │ │ │ │ 79 │ │ prompt += '<|Human|>: ' + query + '' │ │ 80 │ │ inputs = tokenizer(prompt, return_tensors="pt") │ │ 81 │ │ with torch.no_grad(): │ │ ❱ 82 │ │ │ outputs = model.generate( │ │ 83 │ │ │ │ inputs.input_ids.cuda(), │ │ 84 │ │ │ │ attention_mask=inputs.attention_mask.cuda(), │ │ 85 │ │ │ │ max_length=2048, │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27 in decorate_context │ │ │ │ 24 │ │ @functools.wraps(func) │ │ 25 │ │ def decorate_context(*args, **kwargs): │ │ 26 │ │ │ with self.clone(): │ │ ❱ 27 │ │ │ │ return func(*args, **kwargs) │ │ 28 │ │ return cast(F, decorate_context) │ │ 29 │ │ │ 30 │ def _wrap_generator(self, func): │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/transformers/generation/utils.py:1571 in generate │ │ │ │ 1568 │ │ │ ) │ │ 1569 │ │ │ │ │ 1570 │ │ │ # 12. run sample │ │ ❱ 1571 │ │ │ return self.sample( │ │ 1572 │ │ │ │ input_ids, │ │ 1573 │ │ │ │ logits_processor=logits_processor, │ │ 1574 │ │ │ │ logits_warper=logits_warper, │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/transformers/generation/utils.py:2534 in sample │ │ │ │ 2531 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) │ │ 2532 │ │ │ │ │ 2533 │ │ │ # forward pass to get next token │ │ ❱ 2534 │ │ │ outputs = self( │ │ 2535 │ │ │ │ **model_inputs, │ │ 2536 │ │ │ │ return_dict=True, │ │ 2537 │ │ │ │ output_attentions=output_attentions, │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │ │ │ │ 1191 │ │ # this function, and just call forward. │ │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │ │ 1195 │ │ # Do not call functions when jit is used │ │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ /data/MOSS/models/modeling_moss.py:678 in forward │ │ │ │ 675 │ │ """ │ │ 676 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │ │ 677 │ │ │ │ ❱ 678 │ │ transformer_outputs = self.transformer( │ │ 679 │ │ │ input_ids, │ │ 680 │ │ │ past_key_values=past_key_values, │ │ 681 │ │ │ attention_mask=attention_mask, │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │ │ │ │ 1191 │ │ # this function, and just call forward. │ │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │ │ 1195 │ │ # Do not call functions when jit is used │ │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ /data/MOSS/models/modeling_moss.py:545 in forward │ │ │ │ 542 │ │ │ │ │ head_mask[i], │ │ 543 │ │ │ │ ) │ │ 544 │ │ │ else: │ │ ❱ 545 │ │ │ │ outputs = block( │ │ 546 │ │ │ │ │ hidden_states=hidden_states, │ │ 547 │ │ │ │ │ layer_past=layer_past, │ │ 548 │ │ │ │ │ attention_mask=attention_mask, │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │ │ │ │ 1191 │ │ # this function, and just call forward. │ │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │ │ 1195 │ │ # Do not call functions when jit is used │ │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ /data/MOSS/models/modeling_moss.py:270 in forward │ │ │ │ 267 │ ) -> Union[Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor │ │ 268 │ │ residual = hidden_states │ │ 269 │ │ hidden_states = self.ln_1(hidden_states) │ │ ❱ 270 │ │ attn_outputs = self.attn( │ │ 271 │ │ │ hidden_states=hidden_states, │ │ 272 │ │ │ layer_past=layer_past, │ │ 273 │ │ │ attention_mask=attention_mask, │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │ │ │ │ 1191 │ │ # this function, and just call forward. │ │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1194 │ │ │ return forward_call(*input, **kwargs) │ │ 1195 │ │ # Do not call functions when jit is used │ │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ /data/MOSS/models/modeling_moss.py:164 in forward │ │ │ │ 161 │ │ Tuple[torch.Tensor, Tuple[torch.Tensor]], │ │ 162 │ │ Optional[Tuple[torch.Tensor, Tuple[torch.Tensor], Tuple[torch.Tensor, ...]]], │ │ 163 │ ]: │ │ ❱ 164 │ │ qkv = self.qkv_proj(hidden_states) │ │ 165 │ │ # TODO(enijkamp): factor out number of logical TPU-v4 cores or make forward pass │ │ 166 │ │ mp_num = 4 │ │ 167 │ │ qkv_split = qkv.reshape(qkv.shape[:-1] + (mp_num, -1)) │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1194 in _call_impl │ │ │ │ 1191 │ │ # this function, and just call forward. │ │ 1192 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │ │ 1193 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │ │ ❱ 1194 │ │ │ return forward_call(input, **kwargs) │ │ 1195 │ │ # Do not call functions when jit is used │ │ 1196 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │ │ 1197 │ │ if self._backward_hooks or _global_backward_hooks: │ │ │ │ /data/MOSS/models/quantization.py:367 in forward │ │ │ │ 364 │ │ │ 365 │ def forward(self, x): │ │ 366 │ │ out_shape = x.shape[:-1] + (self.outfeatures,) │ │ ❱ 367 │ │ out = QuantLinearFunction.apply(x.reshape(-1, x.shape[-1]), self.qweight, self.s │ │ 368 │ │ │ │ │ │ │ │ │ │ self.qzeros, self.g_idx, self.bits, self.maxq) │ │ 369 │ │ out = out + self.bias if self.bias is not None else out │ │ 370 │ │ return out.reshape(out_shape) │ │ │ │ /opt/miniconda3/lib/python3.10/site-packages/torch/cuda/amp/autocast_mode.py:105 in decorate_fwd │ │ │ │ 102 │ │ │ │ with autocast(enabled=False): │ │ 103 │ │ │ │ │ return fwd(_cast(args, cast_inputs), **_cast(kwargs, cast_inputs)) │ │ 104 │ │ │ else: │ │ ❱ 105 │ │ │ │ return fwd(*args, **kwargs) │ │ 106 │ return decorate_fwd │ │ 107 │ │ 108 │ │ │ │ /data/MOSS/models/quantization.py:279 in forward │ │ │ │ 276 │ @staticmethod │ │ 277 │ @custom_fwd(cast_inputs=torch.float16) │ │ 278 │ def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq): │ │ ❱ 279 │ │ output = matmul248(input, qweight, scales, qzeros, g_idx, bits, maxq) │ │ 280 │ │ ctx.save_for_backward(qweight, scales, qzeros, g_idx) │ │ 281 │ │ ctx.bits, ctx.maxq = bits, maxq │ │ 282 │ │ return output │ │ │ │ /data/MOSS/models/quantization.py:250 in matmul248 │ │ │ │ 247 │ output = torch.empty((input.shape[0], qweight.shape[1]), device='cuda', dtype=torch. │ │ 248 │ grid = lambda META: ( │ │ 249 │ triton.cdiv(input.shape[0], META['BLOCK_SIZE_M']) * triton.cdiv(qweight.shape[1], ME │ │ ❱ 250 │ matmul_248_kernel[grid](input, qweight, output, │ │ 251 │ │ │ │ │ │ │ scales, qzeros, g_idx, │ │ 252 │ │ │ │ │ │ │ input.shape[0], qweight.shape[1], input.shape[1], bits, maxq │ │ 253 │ │ │ │ │ │ │ input.stride(0), input.stride(1), │ │ │ │ /data/MOSS/models/custom_autotune.py:93 in run │ │ │ │ 90 │ │ │ │ │ │ │ for config in pruned_configs} │ │ 91 │ │ │ │ bench_end = time.time() │ │ 92 │ │ │ │ self.bench_time = bench_end - bench_start │ │ ❱ 93 │ │ │ │ self.cache[key] = builtins.min(timings, key=timings.get) │ │ 94 │ │ │ │ self.hook(args) │ │ 95 │ │ │ │ self.configs_timings = timings │ │ 96 │ │ │ config = self.cache[key] │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯ TypeError: '<' not supported between instances of 'tuple' and 'float'

May 07 '23 02:05 shihzenq

MOSS MOSS copied to clipboard

bugfix

MOSS
MOSS copied to clipboard