PaddleX
PaddleX copied to clipboard
paddlex BML训练报错:(InvalidArgument) yolo_box(): argument 'X' (position 0) must be Tensor, but got Tensor (at /paddle/paddle/fluid/pybind/op_function_common.cc:818)
Checklist:
- 查找历史相关issue寻求解答
- 翻阅FAQ常见问题汇总和答疑
- 确认bug是否在新版本里还未修复
- 翻阅PaddleX API文档说明
描述问题
训练过程正常,一个epoch结束,一开始开始验证时就报错:
`ValueError Traceback (most recent call last)
/tmp/ipykernel_1258/573955976.py in <module>
68 save_interval_epochs=5,
69 lr_decay_epochs=[216, 243],
---> 70 save_dir='output/yolov3_darknet53')
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/models/detector.py in train(self, num_epochs, train_dataset, train_batch_size, eval_dataset, optimizer, save_interval_epochs, log_interval_steps, save_dir, pretrain_weights, learning_rate, warmup_steps, warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop, early_stop_patience, use_vdl, resume_checkpoint)
332 early_stop=early_stop,
333 early_stop_patience=early_stop_patience,
--> 334 use_vdl=use_vdl)
335
336 def quant_aware_train(self,
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/models/base.py in train_loop(self, num_epochs, train_dataset, train_batch_size, eval_dataset, save_interval_epochs, log_interval_steps, save_dir, ema, early_stop, early_stop_patience, use_vdl)
395 eval_dataset,
396 batch_size=eval_batch_size,
--> 397 return_details=True)
398 # 保存最优模型
399 if local_rank == 0:
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/models/detector.py in evaluate(self, eval_dataset, batch_size, metric, return_details)
497 with paddle.no_grad():
498 for step, data in enumerate(self.eval_data_loader):
--> 499 outputs = self.run(self.net, data, 'eval')
500 eval_metric.update(data, outputs)
501 eval_metric.accumulate()
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/models/detector.py in run(self, net, inputs, mode)
103
104 def run(self, net, inputs, mode):
--> 105 net_out = net(inputs)
106 if mode in ['train', 'eval']:
107 outputs = net_out
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py in __call__(self, *inputs, **kwargs)
946 and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode() and (not in_profiler_mode()):
947 self._build_once(*inputs, **kwargs)
--> 948 return self.forward(*inputs, **kwargs)
949 else:
950 return self._dygraph_call_func(*inputs, **kwargs)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/ppdet/modeling/architectures/meta_arch.py in forward(self, inputs)
69 for inp in inputs_list:
70 self.inputs = inp
---> 71 outs.append(self.get_pred())
72
73 # multi-scale test
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/ppdet/modeling/architectures/yolo.py in get_pred(self)
122
123 def get_pred(self):
--> 124 return self._forward()
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/ppdet/modeling/architectures/yolo.py in _forward(self)
113 bbox, bbox_num = self.post_process(
114 yolo_head_outs, self.yolo_head.mask_anchors,
--> 115 self.inputs['im_shape'], self.inputs['scale_factor'])
116 output = {'bbox': bbox, 'bbox_num': bbox_num}
117
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/fluid/dygraph/layers.py in __call__(self, *inputs, **kwargs)
946 and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode() and (not in_profiler_mode()):
947 self._build_once(*inputs, **kwargs)
--> 948 return self.forward(*inputs, **kwargs)
949 else:
950 return self._dygraph_call_func(*inputs, **kwargs)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/ppdet/modeling/post_process.py in forward(self, head_out, rois, im_shape, scale_factor)
65 """
66 if self.nms is not None:
---> 67 bboxes, score = self.decode(head_out, rois, im_shape, scale_factor)
68 bbox_pred, bbox_num, _ = self.nms(bboxes, score, self.num_classes)
69 else:
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/ppdet/modeling/layers.py in __call__(self, yolo_head_out, anchors, im_shape, scale_factor, var_weight)
546 self.num_classes, self.conf_thresh,
547 self.downsample_ratio // 2**i,
--> 548 self.clip_bbox, self.scale_x_y)
549 boxes_list.append(boxes)
550 scores_list.append(paddle.transpose(scores, perm=[0, 2, 1]))
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/ppdet/modeling/ops.py in yolo_box(x, origin_shape, anchors, class_num, conf_thresh, downsample_ratio, clip_bbox, scale_x_y, name)
701 conf_thresh, 'downsample_ratio', downsample_ratio,
702 'clip_bbox', clip_bbox, 'scale_x_y', scale_x_y)
--> 703 boxes, scores = core.ops.yolo_box(x, origin_shape, *attrs)
704 return boxes, scores
705 else:
ValueError: (InvalidArgument) yolo_box(): argument 'X' (position 0) must be Tensor, but got Tensor (at /paddle/paddle/fluid/pybind/op_function_common.cc:818)`
复现
1.找到类似的问题,反馈把paddle版本降低到2.3.2即可,但是降到这个版本后,又会报以下错误:
`
AttributeError Traceback (most recent call last)
/tmp/ipykernel_263/2175501212.py in
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/__init__.py in <module>
18 init_parallel_env()
19
---> 20 from . import cv
21 from . import seg
22 from . import cls
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/__init__.py in <module>
13 # limitations under the License.
---> 15 from . import models
16 from . import transforms
17 from . import datasets
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/models/__init__.py in <module>
13 # limitations under the License.
14
---> 15 from .segmenter import *
16 from .classifier import *
17 from .detector import *
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/models/segmenter.py in <module>
26 from paddlex.utils import get_single_card_bs, DisablePrint
27 import paddlex.utils.logging as logging
---> 28 from .base import BaseModel
29 from .utils import seg_metrics as metrics
30 from paddlex.utils.checkpoint import seg_pretrain_weights_dict
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlex/cv/models/base.py in <module>
23 import paddle
24 from paddle.io import DataLoader, DistributedBatchSampler
---> 25 from paddleslim import QAT
26 from paddleslim.analysis import flops
27 from paddleslim import L1NormFilterPruner, FPGMFilterPruner
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/__init__.py in <module>
15 from __future__ import absolute_import
16 from paddleslim import models
---> 17 from paddleslim import prune
18 from paddleslim import nas
19 from paddleslim import analysis
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/prune/__init__.py in <module>
14
15 from __future__ import absolute_import
---> 16 from .pruner import *
17 from ..prune import pruner
18 from .auto_pruner import *
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/prune/pruner.py in <module>
18 import numpy as np
19 from functools import reduce
---> 20 from ..core import VarWrapper, OpWrapper, GraphWrapper
21 from .collections import StaticPruningCollections
22 from .criterion import CRITERION
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/core/__init__.py in <module>
17 from ..core import registry
18 from .registry import *
---> 19 from ..core import dygraph
20 from .dygraph import *
21
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/core/dygraph.py in <module>
7 from paddle.fluid.dygraph.layers import Layer
8 from paddle.fluid.framework import Block, ParamBase, Program, Variable
----> 9 from ..common import get_logger
10
11 __all__ = ["dygraph2program"]
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/common/__init__.py in <module>
19 from .lock import lock, unlock
20 from .cached_reader import cached_reader
---> 21 from .server import Server
22 from .client import Client
23 from .meter import AvgrageMeter
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/common/server.py in <module>
26 import threading
27 from .log_helper import get_logger
---> 28 from .rl_controller.utils import add_grad, ConnectMessage_logger = get_logger(__name__, level=logging.INFO)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddleslim/common/rl_controller/__init__.py in <module>
17 _logger = get_logger(__name__, level=logging.INFO)
18 try:
---> 19 import parl
20 from .ddpg import *
21 except ImportError as e:
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/parl/__init__.py in <module>
45 from parl.core.torch import *
46
---> 47 from parl.remote import remote_class, connect
48 from parl import algorithms
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/parl/remote/__init__.py in <module>
17 from parl.remote.client import *
18 from parl.remote.exceptions import *
---> 19 from parl.remote.remote_decorator import *
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/parl/remote/remote_decorator.py in <module>
**17**
18 from parl.utils import logger
---> 19 from parl.remote.remote_wrapper import RemoteWrapper
20 from parl.remote.proxy_wrapper import proxy_wrapper_func
21 from parl.remote.future_mode import proxy_wrapper_nowait_func
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/parl/remote/remote_wrapper.py in <module>
19
20 from parl.utils import logger, to_str, to_byte
---> 21 from parl.remote.communication import loads_argument, loads_return,\
22 dumps_argument, dumps_return
23 from parl.remote.client import get_global_client
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/parl/remote/communication.py in <module>
36 return val
---> 38 context = pyarrow.default_serialization_context()
40 # support deserialize in another environment
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/pyarrow/__init__.py in __getattr__(name)
315
316 raise AttributeError(
--> 317 "module 'pyarrow' has no attribute '{0}'".format(name)
318 )
319
AttributeError: module 'pyarrow' has no attribute 'default_serialization_context'`
-
再次升级paddle到2.5.0以上,在pip install paddlex过程中,又卡着动不了:
-
实在让人崩溃!!!崩溃!!!崩溃!!!崩溃!!!崩溃!!!崩溃!!!崩溃!!!崩溃!!!崩溃!!!崩溃!!!崩溃!!!
环境
-
PaddlePaddle 2.4.0, PaddleX 2.1.0
-
BLM云训练
-
Python: 3.7.4
-
cuDNN Version: 8.2.
欢迎尝试使用新版本试试,https://aistudio.baidu.com/intro/paddlex