pytorch-forecasting icon indicating copy to clipboard operation
pytorch-forecasting copied to clipboard

Learning rate finder permission error

Open hannahyess opened this issue 3 years ago • 0 comments

  • PyTorch-Forecasting version: 0.10.1
  • PyTorch version: 1.11.0
  • Python version: 3.9
  • Operating System: Windows

Issue

I tried tunning learning rate with "auto_lr_find", But encountered permission error for the .ckpt file

Code to reproduce the problem (same code from one of the tutorial)

data = generate_ar_data(seasonality=10.0, timesteps=400, n_series=100, seed=42)
data["static"] = 2
data["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(data.time_idx, "D")
data.head()

# create dataset and dataloaders
max_encoder_length = 60
max_prediction_length = 20

training_cutoff = data["time_idx"].max() - max_prediction_length

context_length = max_encoder_length
prediction_length = max_prediction_length

training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="value",
    categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
    group_ids=["series"],
    time_varying_unknown_reals=["value"],
    max_encoder_length=context_length,
    max_prediction_length=prediction_length,
)

validation = TimeSeriesDataSet.from_dataset(training, data, min_prediction_idx=training_cutoff + 1)
batch_size = 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

pl.seed_everything(42)
trainer = pl.Trainer(gpus=0, gradient_clip_val=0.1, auto_lr_find=True)
net = TemporalFusionTransformer.from_dataset(
    training, learning_rate=3e-2, hidden_size=30, loss=NormalDistributionLoss()
)
trainer.tune(net)

Error message obtained

---------------------------------------------------------------------------
PermissionError                           Traceback (most recent call last)
~\AppData\Local\Temp\1/ipykernel_21960/200211129.py in <module>
      4     training, learning_rate=3e-2, hidden_size=30, loss=NormalDistributionLoss()
      5 )
----> 6 trainer.tune(net)

~\Anaconda3\lib\site-packages\pytorch_lightning\trainer\trainer.py in tune(self, model, train_dataloaders, val_dataloaders, datamodule, scale_batch_size_kwargs, lr_find_kwargs)
   1127 
   1128         with isolate_rng():
-> 1129             result = self.tuner._tune(
   1130                 model, scale_batch_size_kwargs=scale_batch_size_kwargs, lr_find_kwargs=lr_find_kwargs
   1131             )

~\Anaconda3\lib\site-packages\pytorch_lightning\tuner\tuning.py in _tune(self, model, scale_batch_size_kwargs, lr_find_kwargs)
     61         if self.trainer.auto_lr_find:
     62             lr_find_kwargs.setdefault("update_attr", True)
---> 63             result["lr_find"] = lr_find(self.trainer, model, **lr_find_kwargs)
     64 
     65         self.trainer.state.status = TrainerStatus.FINISHED

~\Anaconda3\lib\site-packages\pytorch_lightning\tuner\lr_finder.py in lr_find(trainer, model, min_lr, max_lr, num_training, mode, early_stop_threshold, update_attr)
    205     # Save initial model, that is loaded after learning rate is found
    206     ckpt_path = os.path.join(trainer.default_root_dir, f".lr_find_{uuid.uuid4()}.ckpt")
--> 207     trainer.save_checkpoint(ckpt_path)
    208     params = __lr_finder_dump_params(trainer)
    209 

~\Anaconda3\lib\site-packages\pytorch_lightning\trainer\trainer.py in save_checkpoint(self, filepath, weights_only, storage_options)
   2459 
   2460         """
-> 2461         self._checkpoint_connector.save_checkpoint(filepath, weights_only=weights_only, storage_options=storage_options)
   2462 
   2463     """

~\Anaconda3\lib\site-packages\pytorch_lightning\trainer\connectors\checkpoint_connector.py in save_checkpoint(self, filepath, weights_only, storage_options)
    443         """
    444         _checkpoint = self.dump_checkpoint(weights_only)
--> 445         self.trainer.strategy.save_checkpoint(_checkpoint, filepath, storage_options=storage_options)
    446 
    447     def _get_lightning_module_state_dict(self) -> Dict[str, torch.Tensor]:

~\Anaconda3\lib\site-packages\pytorch_lightning\strategies\strategy.py in save_checkpoint(self, checkpoint, filepath, storage_options)
    416         """
    417         if self.is_global_zero:
--> 418             self.checkpoint_io.save_checkpoint(checkpoint, filepath, storage_options=storage_options)
    419 
    420     def remove_checkpoint(self, filepath: _PATH) -> None:

~\Anaconda3\lib\site-packages\pytorch_lightning\plugins\io\torch_plugin.py in save_checkpoint(self, checkpoint, path, storage_options)
     52         try:
     53             # write the checkpoint dictionary on the file
---> 54             atomic_save(checkpoint, path)
     55         except AttributeError as err:
     56             # todo (sean): is this try catch necessary still?

~\Anaconda3\lib\site-packages\pytorch_lightning\utilities\cloud_io.py in atomic_save(checkpoint, filepath)
     66     bytesbuffer = io.BytesIO()
     67     torch.save(checkpoint, bytesbuffer)
---> 68     with fsspec.open(filepath, "wb") as f:
     69         f.write(bytesbuffer.getvalue())

~\Anaconda3\lib\site-packages\fsspec\core.py in __enter__(self)
    101         mode = self.mode.replace("t", "").replace("b", "") + "b"
    102 
--> 103         f = self.fs.open(self.path, mode=mode)
    104 
    105         self.fobjects = [f]

~\Anaconda3\lib\site-packages\fsspec\spec.py in open(self, path, mode, block_size, cache_options, **kwargs)
   1004         else:
   1005             ac = kwargs.pop("autocommit", not self._intrans)
-> 1006             f = self._open(
   1007                 path,
   1008                 mode=mode,

~\Anaconda3\lib\site-packages\fsspec\implementations\local.py in _open(self, path, mode, block_size, **kwargs)
    153         if self.auto_mkdir and "w" in mode:
    154             self.makedirs(self._parent(path), exist_ok=True)
--> 155         return LocalFileOpener(path, mode, fs=self, **kwargs)
    156 
    157     def touch(self, path, **kwargs):

~\Anaconda3\lib\site-packages\fsspec\implementations\local.py in __init__(self, path, mode, autocommit, fs, compression, **kwargs)
    248         self.compression = get_compression(path, compression)
    249         self.blocksize = io.DEFAULT_BUFFER_SIZE
--> 250         self._open()
    251 
    252     def _open(self):

~\Anaconda3\lib\site-packages\fsspec\implementations\local.py in _open(self)
    253         if self.f is None or self.f.closed:
    254             if self.autocommit or "w" not in self.mode:
--> 255                 self.f = open(self.path, mode=self.mode)
    256                 if self.compression:
    257                     compress = compr[self.compression]

PermissionError: [Errno 13] Permission denied: 'C:/.lr_find_777339cd-e0ec-4bb1-89db-8888b7ec69a6.ckpt'

hannahyess avatar Apr 06 '22 02:04 hannahyess