Models
Models copied to clipboard
大佬求帮助,在训练atss检测的时候,按照官方dump成mge出错,.tm模型不能可视化
环境
1.系统环境:Ubuntu 22.04.2 2.MegEngine版本:1.13.0 3.python版本:3.10.12 4.模型名称:atss_res18_coco_3x_800size
复现步骤
1.训练
python official/vision/detection/tools/train.py -f official/vision/detection/configs/atss_res18_coco_3x_800size.py -n 1 -d data/coco/
2.convert
python convert.py -f official/vision/detection/configs/atss_res18_coco_3x_800size.py -w log-of-atss_res18_coco_3x_800size/epoch_9.pkl -i official/assets/cat.jpg
请提供关键的代码片段便于追查问题
**##转换代码**
import numpy as np
import megengine.functional as F
import megengine.hub
from megengine import jit, tensor
import megengine as mge
import megengine.distributed as dist
from megengine.autodiff import GradManager
from megengine.data import DataLoader, Infinite, RandomSampler
from megengine.data import transform as T
from megengine.optimizer import SGD
from official.vision.detection.tools.data_mapper import data_mapper
from official.vision.detection.tools.utils import DetEvaluator, import_from_file
import megengine.traced_module as tm
import argparse
import bisect
import copy
import os
import time
import cv2
def make_parser():
parser = argparse.ArgumentParser()
parser.add_argument(
"-f", "--file", default="net.py", type=str, help="net description file"
)
parser.add_argument(
"-w", "--weight_file", default=None, type=str, help="weights file",
)
parser.add_argument("-i", "--image", type=str)
return parser
if __name__ == "__main__":
parser = make_parser()
args = parser.parse_args()
current_network = import_from_file(args.file)
cfg = current_network.Cfg()
cfg.backbone_pretrained = False
model = current_network.Net(cfg)
ori_img = cv2.imread(args.image)
image, im_info = DetEvaluator.process_inputs(
ori_img.copy(), model.cfg.test_image_short_size, model.cfg.test_image_max_size,
)
state_dict = mge.load(args.weight_file)
if "state_dict" in state_dict:
state_dict = state_dict["state_dict"]
model.load_state_dict(state_dict)
model.eval()
traced_resnet = tm.trace_module(model, mge.tensor(image),im_info=mge.tensor(im_info))
# 可以在这里进行基于 trace_module 的图手术,以及模型转换
traced_resnet.eval()
mge.save(traced_resnet,"test.tm")
@jit.trace(symbolic=True, capture_as_const=True)
def infer_func(data, im_info, model):
pred = model(data,im_info)
return pred
output = infer_func(mge.tensor(image),im_info=mge.tensor(im_info), model=traced_resnet)
infer_func.dump("log-of-atss_res18_coco_3x_800size/test.mge", arg_names=["data"])
请提供完整的日志及报错信息
25 17:37:50[mgb] WRN [dnn]
Cudnn8 will jit ptx code with cache. You can set
CUDA_CACHE_MAXSIZE and CUDA_CACHE_PATH environment var to avoid repeat jit(very slow).
For example `export CUDA_CACHE_MAXSIZE=2147483647` and `export CUDA_CACHE_PATH=/data/.cuda_cache`
25 17:37:53[mgb] ERR error while applying optimization pass PassConvertToCompatible: bad input shape for polyadic operator: {256}, {1,256,136,100}
backtrace:
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb13MegBrainErrorC1ERKSs+0x4a) [0x7fdfdad5590a]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x2db7867) [0x7fdfdadb7867]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN6megdnn12ErrorHandler15on_megdnn_errorERKSs+0x14) [0x7fdfde9d86a4]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x6a078d8) [0x7fdfdea078d8]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x6a07a91) [0x7fdfdea07a91]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb3opr8Elemwise20get_output_var_shapeEN6megdnn5param8Elemwise4ModeERKNS2_11SmallVectorINS2_11TensorShapeELj4EEE+0x37) [0x7fdfdaf150e7]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZNK3mgb3opr8Elemwise20get_output_var_shapeERKN6megdnn11SmallVectorINS2_11TensorShapeELj4EEERS5_+0x29) [0x7fdfdaf1e0c9]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb2cg5mixin24OutshapePureByInshapeOpr10infer_descEmRN6megdnn11TensorShapeERKNS0_12static_infer6InpValE+0x19d) [0x7fdfdade9e0d]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb2cg12static_infer22StaticInferManagerImpl13TagShapeTrait8do_inferERKNS1_6InpValE+0x57) [0x7fdfdae0b437]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x2e0b292) [0x7fdfdae0b292]
Traceback (most recent call last):
File "/home/csy/megvii/Models/convert.py", line 63, in <module>
infer_func.dump("log-of-atss_res18_coco_3x_800size/test.mge", arg_names=["data"])
File "/home/csy/.local/lib/python3.10/site-packages/megengine/jit/tracing.py", line 1183, in dump
dump_content, dump_info = G.dump_graph(
File "/home/csy/.local/lib/python3.10/site-packages/megengine/core/tensor/megbrain_graph.py", line 456, in dump_graph
dump_content = _imperative_rt.dump_graph(
RuntimeError: bad input shape for polyadic operator: {256}, {1,256,136,100}
backtrace:
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb13MegBrainErrorC1ERKSs+0x4a) [0x7fdfdad5590a]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x2db7867) [0x7fdfdadb7867]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN6megdnn12ErrorHandler15on_megdnn_errorERKSs+0x14) [0x7fdfde9d86a4]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x6a078d8) [0x7fdfdea078d8]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x6a07a91) [0x7fdfdea07a91]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb3opr8Elemwise20get_output_var_shapeEN6megdnn5param8Elemwise4ModeERKNS2_11SmallVectorINS2_11TensorShapeELj4EEE+0x37) [0x7fdfdaf150e7]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZNK3mgb3opr8Elemwise20get_output_var_shapeERKN6megdnn11SmallVectorINS2_11TensorShapeELj4EEERS5_+0x29) [0x7fdfdaf1e0c9]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb2cg5mixin24OutshapePureByInshapeOpr10infer_descEmRN6megdnn11TensorShapeERKNS0_12static_infer6InpValE+0x19d) [0x7fdfdade9e0d]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(_ZN3mgb2cg12static_infer22StaticInferManagerImpl13TagShapeTrait8do_inferERKNS1_6InpValE+0x57) [0x7fdfdae0b437]
/home/csy/.local/lib/python3.10/site-packages/megengine/core/lib/libmegengine_shared.so(+0x2e0b292) [0x7fdfdae0b292]
@FateScript 大佬帮忙看看