pytorch-image-models
pytorch-image-models copied to clipboard
[FEATURE] Creating Scheduler V2 (same as create_optimizer_v2 concept)
When building optimizers now, we can use the function create_optimizer_v2
to take in keyword arguments instead of the parser arguments. This makes it easier to set up configuration files (can just pass in kwargs).
I believe the same can be done for the scheduler too. Please let me know what is stopping us from this doing, I do not mind spending time to contribute this feature but I am concerned that I may miss something fundamental about schedulers that are hindering us from doing this.
Amendments can be made here: https://github.com/rwightman/pytorch-image-models/blob/7c67d6aca992f039eece0af5f7c29a43d48c00e4/timm/scheduler/scheduler_factory.py
So far I have written a rough code that should do the trick, please let me know if this is something you are looking for in a PR and I will submit it after testing with various training scripts:
imports ...
def scheduler_kwargs(cfg):
"""cfg/argparse to kwargs helper
Convert scheduler args in argparse args or cfg like object to keyword args for updated create fn.
"""
# Get all the arguments (just so that we are compatible with version 1 of create_scheduler function and train.py)
kwargs = vars(cfg)
return kwargs
def create_scheduler(args, optimizer):
return create_scheduler_v2(optimizer, **scheduler_kwargs(cfg=args))
def create_scheduler_v2(
optimizer,
sched="cosine",
epochs=300,
min_lr=1e-6,
warmup_lr=10,
warmup_epochs=10,
lr_k_decay=1.0,
decay_epochs=100,
decay_rate=0.1,
patience_epochs=10,
cooldown_epochs=10,
lr_noise=None,
lr_noise_pct=0.67,
lr_noise_std=1,
seed=42,
lr_cycle_mul=1.0,
lr_cycle_decay=0.1,
lr_cycle_limit=1,
**kwargs_additional,
):
schedulers_available = [
"cosine",
"tanh",
"step",
"multistep",
"plateau",
"poly",
"none",
]
if sched not in schedulers_available:
raise SystemExit(
f"Scheduler that was specified does not exist in this library.\
Selections of schedulers are as follows: {schedulers_available}"
)
num_epochs = epochs
if sched == "none":
return None, num_epochs
if lr_noise is not None:
if isinstance(lr_noise, (list, tuple)):
noise_range = [n * num_epochs for n in lr_noise]
if len(noise_range) == 1:
noise_range = noise_range[0]
else:
noise_range = lr_noise * num_epochs
else:
noise_range = None
kwargs["noise_args"] = dict(
noise_range_t=noise_range,
noise_pct=lr_noise_pct,
noise_std=lr_noise_std,
noise_seed=seed,
)
kwargs["cycle_args"] = dict(
cycle_mul=lr_cycle_mul,
cycle_decay=lr_cycle_decay,
cycle_limit=lr_cycle_limit,
)
if sched == "cosine":
lr_scheduler = CosineLRScheduler(
optimizer,
t_initial=num_epochs,
lr_min=min_lr,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
k_decay=lr_k_decay,
**kwargs["cycle_args"],
**kwargs["noise_args"],
)
num_epochs = lr_scheduler.get_cycle_length() + cooldown_epochs
elif sched == "tanh":
lr_scheduler = TanhLRScheduler(
optimizer,
t_initial=num_epochs,
lr_min=min_lr,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
t_in_epochs=True,
**kwargs["cycle_args"],
**kwargs["noise_args"],
)
num_epochs = lr_scheduler.get_cycle_length() + cooldown_epochs
elif sched == "step":
lr_scheduler = StepLRScheduler(
optimizer,
decay_t=decay_epochs,
decay_rate=decay_rate,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
**kwargs["noise_args"],
)
elif sched == "multistep":
lr_scheduler = MultiStepLRScheduler(
optimizer,
decay_t=decay_epochs,
decay_rate=decay_rate,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
**kwargs["noise_args"],
)
elif sched == "plateau":
mode = "min" if "loss" in kwargs["eval_metric"] else "max"
lr_scheduler = PlateauLRScheduler(
optimizer,
decay_rate=decay_rate,
patience_t=patience_epochs,
lr_min=min_lr,
mode=mode,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
cooldown_t=0,
**kwargs["noise_args"],
)
elif sched == "poly":
lr_scheduler = PolyLRScheduler(
optimizer,
power=decay_rate, # overloading 'decay_rate' as polynomial power
t_initial=num_epochs,
lr_min=min_lr,
warmup_lr_init=warmup_lr,
warmup_t=warmup_epochs,
k_decay=lr_k_decay,
**kwargs["cycle_args"],
**kwargs["noise_args"],
)
num_epochs = lr_scheduler.get_cycle_length() + cooldown_epochs
return lr_scheduler, num_epochs
@timothylimyl thanks for the thoughtful issue, I do actually have plans for a v2 of scheduler and the argument handling would match your idea here.
BUT I've held off because I was thinking of making more significant changes to the schedulers themselves (and the epochs / steps) interface... I'm hesistent to make a v2 to improve the args issue and then also need a v3 for the other changes...
Let's leave this open and give me some more time to gather my thoughts...
@rwightman look forward to the changes, please let me know if any help is needed.