super-gradients
super-gradients copied to clipboard
Using DetectionMetricsDistanceBased as a metric throws error - KeyError: '[email protected]'
🐛 Describe the bug
I wanted to add DetectionMetricsDistanceBased as one of the metrics when training, but then it throws saying KeyError: '[email protected]'
Here's the relevant part of my code, I wonder if I messed something up. I couldn't find any other examples of this, so I went with my gut. Let me know if more parts of my code is needed
I think it's worth to say the end goal here is to improve small object detection
Training Parameters
from super_gradients.training.losses import PPYoloELoss
from super_gradients.training.metrics import (
DetectionMetrics_050,
DetectionMetrics_050_095,
)
from super_gradients.training.metrics.detection_metrics import (
DetectionMetricsDistanceBased
)
from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback
early_stop = EarlyStop(
Phase.VALIDATION_EPOCH_END,
monitor = "[email protected]",
mode = "max",
min_delta = 0.001,
patience = 20,
verbose = True,
)
train_params = {
'silent_mode': False,
"average_best_models":True,
"warmup_mode": "linear_epoch_step",
"warmup_initial_lr": 1e-6,
"lr_warmup_epochs": 4,
"initial_lr": 2e-4,
"lr_mode": "cosine",
"cosine_final_lr_ratio": 0.1,
"optimizer": "AdamW",
"optimizer_params": {"weight_decay": 0.0001},
"zero_weight_decay_on_bias_and_bn": True,
"ema": True,
"batch_accumulate": 1,
"ema_params": {"decay": 0.9, "decay_type": "threshold"},
"max_epochs": EPOCHS,
"mixed_precision": True,
"loss": PPYoloELoss(
use_static_assigner=False,
num_classes=len(dataset_params['classes'])
),
"valid_metrics_list": [
DetectionMetrics_050(
score_thres=0.1,
top_k_predictions=30,
num_cls=len(dataset_params['classes']),
normalize_targets=True,
post_prediction_callback=PPYoloEPostPredictionCallback(
score_threshold=0.01,
nms_top_k=1000,
max_predictions=30,
nms_threshold=0.7
)
),
DetectionMetrics_050_095(
score_thres=0.1,
top_k_predictions=30,
num_cls=len(dataset_params['classes']),
normalize_targets=True,
post_prediction_callback=PPYoloEPostPredictionCallback(
score_threshold=0.01,
nms_top_k=1000,
max_predictions=30,
nms_threshold=0.7
)
),
DetectionMetricsDistanceBased(
num_cls=len(dataset_params['classes']),
post_prediction_callback=PPYoloEPostPredictionCallback(
score_threshold=0.01,
nms_top_k=1000,
max_predictions=30,
nms_threshold=0.7
),
)
],
"metric_to_watch": '[email protected]',
"phase_callbacks": [early_stop],
"resume": False
}
# ...
trainer.train(
model=model,
training_params=train_params,
train_loader=train_data,
valid_loader=val_data
)
Error
Train epoch 0: 100%|██████████| 4939/4939 [34:51<00:00, 2.36it/s, PPYoloELoss/loss=1.43, PPYoloELoss/loss_cls=0.818, PPYoloELoss/loss_dfl=0.318, PPYoloELoss/loss_iou=0.295, gpu_mem=41.2]
Validating: 100%|██████████| 431/431 [01:23<00:00, 5.14it/s]
[2024-03-11 19:43:24] INFO - early_stopping.py - Metric [email protected] improved. New best score: 0.248
[2024-03-11 19:43:24] INFO - base_sg_logger.py - [CLEANUP] - Successfully stopped system monitoring process
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[59], line 16
2 trainer = Trainer(
3 experiment_name=model_to_train+'_ball_aug_'+str(EPOCHS)+'e',
4 ckpt_root_dir=CHECKPOINT_DIR
5 )
7 model = models.get(
8 model_to_train,
9 num_classes=len(dataset_params['classes']),
(...)
12 # arch_params=arch_params
13 )
---> 16 trainer.train(
17 model=model,
18 training_params=train_params,
19 train_loader=train_data,
20 valid_loader=val_data
21 )
File ~/.local/lib/python3.9/site-packages/super_gradients/training/sg_trainer/sg_trainer.py:1604, in Trainer.train(self, model, training_params, train_loader, valid_loader, test_loaders, additional_configs_to_log)
1602 # SAVING AND LOGGING OCCURS ONLY IN THE MAIN PROCESS (IN CASES THERE ARE SEVERAL PROCESSES - DDP)
1603 if should_run_validation and self.training_params.save_model:
-> 1604 self._save_checkpoint(
1605 optimizer=self.optimizer,
1606 epoch=1 + epoch,
1607 train_metrics_dict=train_metrics_dict,
1608 validation_results_dict=valid_metrics_dict,
1609 context=context,
1610 )
1611 self.sg_logger.upload()
1613 if not silent_mode:
File ~/.local/lib/python3.9/site-packages/super_gradients/training/sg_trainer/sg_trainer.py:670, in Trainer._save_checkpoint(self, optimizer, epoch, train_metrics_dict, validation_results_dict, context)
665 # create metrics dict to save
666 valid_metrics_titles = get_metrics_titles(self.valid_metrics)
668 all_metrics = {
669 "tracked_metric_name": self.metric_to_watch,
--> 670 "valid": {metric_name: float(validation_results_dict[metric_name]) for metric_name in valid_metrics_titles},
671 }
673 if train_metrics_dict is not None:
674 train_metrics_titles = get_metrics_titles(self.train_metrics)
File ~/.local/lib/python3.9/site-packages/super_gradients/training/sg_trainer/sg_trainer.py:670, in <dictcomp>(.0)
665 # create metrics dict to save
666 valid_metrics_titles = get_metrics_titles(self.valid_metrics)
668 all_metrics = {
669 "tracked_metric_name": self.metric_to_watch,
--> 670 "valid": {metric_name: float(validation_results_dict[metric_name]) for metric_name in valid_metrics_titles},
671 }
673 if train_metrics_dict is not None:
674 train_metrics_titles = get_metrics_titles(self.train_metrics)
KeyError: '[email protected]'
Versions
Super_gradients 3.6.0