YOLOX
YOLOX copied to clipboard
yolo-nano训练的模型大小和官方预训练模型大小不一致,介于nano和tiny之间,parameters和gflops也差太多
这是我训练的yolox-nano的parameters和gflops,模型权重文件大小为18M,而官方的预训练模型文件大小为7.7M。
这是官方预训练nano和tiny模型的parameters和gflops,谁知道为什么差这么多吗?输入的图片尺寸不一样,gflops不同,但至少parameters应该和官方的一致吧。
这是我的nano的训练配置
class Exp(MyExp): def init(self): super(Exp, self).init()
# ---------------- model config ---------------- #
# pkl_dir = "datasets/VOCdevkit/annotations_cache/VOC2007/test/annots.pkl"
# if os.path.exists(pkl_dir):
# os.remove(pkl_dir)
# self.depth = 0.33
# self.width = 0.25
# self.input_size = (320, 320)
# # self.input_scale = (320, 320) #
# self.random_size = (10, 13)
# self.mosaic_scale = (0.5, 1.5)
# self.mosaic_prob = 0.5
# self.test_size = (320, 320)
# self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
# self.enable_mixup = False
# self.num_classes = 1
self.num_classes = 1
self.input_scale = (320, 320)
self.random_size = (10, 13)
self.depth = 0.33
self.width = 0.25
self.act = 'silu'
# ---------- transform config ------------ #
self.mosaic_prob = 0.5
self.mixup_prob = 1.0
self.hsv_prob = 1.0
self.flip_prob = 0.5
self.degrees = 10.0
self.translate = 0.1
self.mosaic_scale = (0.5, 1.5)
self.mixup_scale = (0.5, 1.5)
self.shear = 2.0
self.enable_mixup = True # 默认False
# ---------------- dataloader config ---------------- #
self.data_num_workers = 2
self.multiscale_range = 2
self.data_dir = None
self.train_ann = "instances_train2017.json"
self.val_ann = "test.txt"
self.test_ann = "test.txt"
# # -------------- training config --------------------- #
self.warmup_epochs = 1
self.max_epoch = 20
self.warmup_lr = 0
self.basic_lr_per_img = 0.01 / 64.0
self.scheduler = "yoloxwarmcos"
self.no_aug_epochs = 3
self.min_lr_ratio = 0.05
self.ema = True
self.weight_decay = 5e-4
self.momentum = 0.9
self.print_interval = 1
self.eval_interval = 1
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.seed = None
self.output_dir = "./workspace/YOLOX_outputs"
# ----------------- testing config ------------------ #
self.test_size = (320, 320)
self.test_conf = 0.01 # 0.3
self.nmsthre = 0.65
def get_model(self, sublinear=False):
def init_yolo(M):
for m in M.modules():
if isinstance(m, nn.BatchNorm2d):
m.eps = 1e-3
m.momentum = 0.03
if "model" not in self.__dict__:
from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
in_channels = [256, 512, 1024]
# NANO model use depthwise = True, which is main difference.
backbone = YOLOPAFPN(
self.depth, self.width, in_channels=in_channels,
act=self.act, depthwise=True,
)
head = YOLOXHead(
self.num_classes, self.width, in_channels=in_channels,
act=self.act, depthwise=True
)
self.model = YOLOX(backbone, head)
self.model.apply(init_yolo)
self.model.head.initialize_biases(1e-2)
return self.model
def get_data_loader(self, batch_size, is_distributed, no_aug=False, cache_img=False):
from yolox.data import (
VOCDetection,
TrainTransform,
YoloBatchSampler,
DataLoader,
InfiniteSampler,
MosaicDetection,
worker_init_reset_seed,
)
from yolox.utils import (
wait_for_the_master,
get_local_rank,
)
local_rank = get_local_rank()
with wait_for_the_master(local_rank):
dataset = VOCDetection(
data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"),
image_sets=[('2007', 'trainval')],
img_size=self.input_size,
preproc=TrainTransform(
max_labels=50,
flip_prob=self.flip_prob,
hsv_prob=self.hsv_prob),
cache=cache_img,
)
dataset = MosaicDetection(
dataset,
mosaic=not no_aug,
img_size=self.input_size,
preproc=TrainTransform(
max_labels=120,
flip_prob=self.flip_prob,
hsv_prob=self.hsv_prob),
degrees=self.degrees,
translate=self.translate,
mosaic_scale=self.mosaic_scale,
mixup_scale=self.mixup_scale,
shear=self.shear,
enable_mixup=self.enable_mixup,
mosaic_prob=self.mosaic_prob,
mixup_prob=self.mixup_prob,
)
self.dataset = dataset
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = InfiniteSampler(
len(self.dataset), seed=self.seed if self.seed else 0
)
batch_sampler = YoloBatchSampler(
sampler=sampler,
batch_size=batch_size,
drop_last=False,
mosaic=not no_aug,
)
dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
dataloader_kwargs["batch_sampler"] = batch_sampler
# Make sure each process has different random seed, especially for 'fork' method
dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
train_loader = DataLoader(self.dataset, **dataloader_kwargs)
return train_loader
def get_eval_loader(self, batch_size, is_distributed, testdev=False, legacy=False):
from yolox.data import VOCDetection, ValTransform
valdataset = VOCDetection(
data_dir=os.path.join(get_yolox_datadir(), "VOCdevkit"),
image_sets=[('2007', 'test')],
img_size=self.test_size,
preproc=ValTransform(legacy=legacy),
)
if is_distributed:
batch_size = batch_size // dist.get_world_size()
sampler = torch.utils.data.distributed.DistributedSampler(
valdataset, shuffle=False
)
else:
sampler = torch.utils.data.SequentialSampler(valdataset)
dataloader_kwargs = {
"num_workers": self.data_num_workers,
"pin_memory": True,
"sampler": sampler,
}
dataloader_kwargs["batch_size"] = batch_size
val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
return val_loader
def get_evaluator(self, batch_size, is_distributed, testdev=False, legacy=False):
from yolox.evaluators import VOCEvaluator
val_loader = self.get_eval_loader(batch_size, is_distributed, testdev, legacy)
evaluator = VOCEvaluator(
dataloader=val_loader,
img_size=self.test_size,
confthre=self.test_conf,
nmsthre=self.nmsthre,
num_classes=self.num_classes,
)
return evaluator
这是我的yolox_voc_nano.py文件,而且用nano训练出来的精度也有问题,用tiny训练就没问题,我的nano和tiny配置文件的区别只有width和depth改了,我的这个nano的配置文件有什么问题吗?
你好,你解决了么,nano我也训练出来是18M,不知道问题出在哪里
what is annots.pkl ?