aimet
aimet copied to clipboard
QAT issue of yolox
Hello, I am trying to make yolox QAT work. Following the tutorial, I first conducted PTQ quantization on the yolox model, which includes CLE and BC operations. After that, I used aimet_torch.quantsim.QuantizationSimModel.model to obtain a quantized model with the quant_op added, and replaced the original yolox.model in the yolox trainer. However, the forward function now returns a tensor, which is different from the tuple returned by the original yolox.model(). Could you please tell me how should I modify my code?
Error:
2024-01-29 05:45:16 | INFO | yolox.core.trainer:344 - Training of experiment is done and the best AP is 0.00
2024-01-29 05:45:16 | ERROR | yolox.core.launch:98 - An error has been caught in function 'launch', process 'MainProcess' (2285414), thread 'MainThread' (139938635503424):
Traceback (most recent call last):
File "/workspace/workdir_trt/YOLOX/tools/train.py", line 146, in <module>
launch(
└ <function launch at 0x7f4406e45dc0>
> File "/workspace/workdir_trt/YOLOX/yolox/core/launch.py", line 98, in launch
main_func(*args)
│ └ (╒═══════════════════╤═══════════════════════════════════════════════════════════════════════════════════════════════════════...
└ <function main at 0x7f4407be6820>
File "/workspace/workdir_trt/YOLOX/tools/train.py", line 123, in main
trainer.train()
│ └ <function Trainer.train at 0x7f4407be6940>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 193, in train
self.train_in_epoch()
│ └ <function Trainer.train_in_epoch at 0x7f4407be69d0>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 202, in train_in_epoch
self.train_in_iter()
│ └ <function Trainer.train_in_iter at 0x7f4407be6a60>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 208, in train_in_iter
self.train_one_iter()
│ └ <function Trainer.train_one_iter at 0x7f4407be6af0>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 229, in train_one_iter
loss = outputs["total_loss"]
└ tensor([[[8.0200e+00, 9.7085e+00, 1.6745e+01, ..., 7.9569e-03,
2.4659e-03, 5.6030e-03],
[1.2010e+01, 1.41...
IndexError: too many indices for tensor of dimension 3
Here is the code I modified.
in yolox.trainer: added:
def apply_cross_layer_equalization(model: torch.nn.Module, input_shape: tuple):
"""
Applying CLE on the model inplace consists of:
Batch Norm Folding
Cross Layer Scaling
High Bias Fold
Converts any ReLU6 into ReLU.
:param model: the loaded model
:param input_shape: the shape of the input to the model
:return:
"""
equalize_model(model, input_shape)
def apply_bias_correction(model: torch.nn.Module, data_loader):
"""
Applies Bias-Correction on the model.
:param model: The model to quantize
:param evaluator: Evaluator used during quantization
:param dataloader: DataLoader used during quantization
:param logdir: Log directory used for storing log files
:return: None
"""
# Rounding mode can be 'nearest' or 'stochastic'
rounding_mode = 'nearest'
# Number of samples used during quantization
num_quant_samples = 16
# Number of samples used for bias correction
num_bias_correct_samples = 16
params = QuantParams(weight_bw=8, act_bw=8, round_mode=rounding_mode, quant_scheme='tf_enhanced')
# Perform Bias Correction
bias_correction.correct_bias(model.to(device="cuda"), params, num_quant_samples=num_quant_samples,
data_loader=data_loader, num_bias_correct_samples=num_bias_correct_samples)
def forward_pass(decoder, model, data_loader):
"""forward pass for compute encodings"""
#pylint:disable = no-member
tensor_type = torch.cuda.FloatTensor
model = model.eval()
for imgs, _, info_imgs, ids in tqdm(data_loader):
with torch.no_grad():
imgs = imgs.type(tensor_type)
outputs = model(imgs)
if decoder is not None:
outputs = decoder(outputs, dtype=outputs.type())
def calculate_quantsim_accuracy(model: torch.nn.Module, evaluator: aimet_common.defs.EvalFunction, dataloader,
use_cuda: bool = False, logdir: str = '') -> Tuple[torch.nn.Module, float]:
"""
Calculates model accuracy on quantized simulator and returns quantized model with accuracy.
:param model: the loaded model
:param evaluator: the Eval function to use for evaluation
:param iterations: No of batches to use in computing encodings.
Not used in image net dataset
:param num_val_samples_per_class: No of samples to use from every class in
computing encodings. Not used in pascal voc
dataset
:param use_cuda: the cuda device.
:return: a tuple of quantsim and accuracy of model on this quantsim
"""
input_shape = (1, 3, 640, 640)
if use_cuda:
model.to(torch.device('cuda'))
dummy_input = torch.rand(input_shape).cuda()
else:
dummy_input = torch.rand(input_shape)
# apply_cross_layer_equalization(model,input_shape)
# apply_bias_correction(model,dataloader)
# Number of batches to use for computing encodings
# Only 5 batches are used here to speed up the process, also the
# number of images in these 5 batches should be sufficient for
# compute encodings
iterations = 5
net_dataloader = get_data_loader(
dataset_path='/workspace/datasets',
img_size=(640,640),
batch_size=64,
num_workers=4,
)
quantsim = QuantizationSimModel(model=model, quant_scheme='tf_enhanced',
dummy_input=dummy_input, rounding_mode='nearest',
default_output_bw=8, default_param_bw=8, in_place=False)
forward_func = partial(forward_pass, None)
quantsim.compute_encodings(forward_func, forward_pass_callback_args=net_dataloader)
#accuracy = evaluator.evaluate(quantsim.model)
return quantsim, accuracy
modified
def before_train(self):
logger.info("args: {}".format(self.args))
logger.info("exp value:\n{}".format(self.exp))
# model related init
torch.cuda.set_device(self.local_rank)
model = self.exp.get_model()
logger.info(
"Model Summary: {}".format(get_model_info(model, self.exp.test_size))
)
model.to(self.device)
# solver related init
self.optimizer = self.exp.get_optimizer(self.args.batch_size)
# value of epoch will be set in `resume_train`
model = self.resume_train(model)
self.evaluator = self.exp.get_evaluator(
batch_size=self.args.batch_size, is_distributed=self.is_distributed
)
# data related init
self.no_aug = self.start_epoch >= self.max_epoch - self.exp.no_aug_epochs
self.train_loader = self.exp.get_data_loader(
batch_size=self.args.batch_size,
is_distributed=self.is_distributed,
no_aug=self.no_aug,
cache_img=self.args.cache,
)
logger.info("init prefetcher, this might take one minute or less...")
self.prefetcher = DataPrefetcher(self.train_loader)
# max_iter means iters per epoch
self.max_iter = len(self.train_loader)
# logger.info("PTQ complete, map50 accuracy:{}".format(map50))
quantsim, map50 = calculate_quantsim_accuracy(model=model,
evaluator=self.evaluator,
dataloader=self.train_loader,
use_cuda=True,
logdir=''
)
model = quantsim.model
self.lr_scheduler = self.exp.get_lr_scheduler(
self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter
)
if self.args.occupy:
occupy_mem(self.local_rank)
if self.is_distributed:
model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False)
if self.use_model_ema:
self.ema_model = ModelEMA(model, 0.9998)
self.ema_model.updates = self.max_iter * self.start_epoch
self.model = model
# Tensorboard and Wandb loggers
if self.rank == 0:
if self.args.logger == "tensorboard":
self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
elif self.args.logger == "wandb":
self.wandb_logger = WandbLogger.initialize_wandb_logger(
self.args,
self.exp,
self.evaluator.dataloader.dataset
)
else:
raise ValueError("logger must be either 'tensorboard' or 'wandb'")
logger.info("Training start...")
logger.info("\n{}".format(model))