Conditioned-Source-Separation-LaSAFT
Conditioned-Source-Separation-LaSAFT copied to clipboard
CSVLogger does not work
Hi, I would like to log the experiments on local only, so I changed the logger to CSVLogger in the lasaft/trainer.py
:
log = args['log']
if log == 'False':
args['logger'] = False
elif log == 'wandb':
args['logger'] = WandbLogger(project='lasaft_exp', tags=[model_name], offline=False, name=run_id)
args['logger'].log_hyperparams(model.hparams)
args['logger'].watch(model, log='all')
elif log == 'tensorboard':
raise NotImplementedError
else:
args['logger'] = True # default
default_log_path = os.path.join(ckpt_path,'lightning_logs')
args['logger'] = CSVLogger(default_log_path, version='0')
mkdir_if_not_exists(default_log_path)
I also set progress_bar_refresh_rate = 0
, then when I run main.py
, it shows the error:
Traceback (most recent call last):
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 644, in run_train
self.train_loop.run_training_epoch()
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 564, in run_training_epoch
self.trainer.run_evaluation(on_epoch=True)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 758, in run_evaluation
self.evaluation_loop.on_evaluation_end()
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/evaluation_loop.py", line 100, in on_evaluation_end
self.trainer.call_hook('on_validation_end', *args, **kwargs)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1101, in call_hook
trainer_hook(*args, **kwargs)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/callback_hook.py", line 183, in on_validation_end
callback.on_validation_end(self, self.lightning_module)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 212, in on_validation_end
self.save_checkpoint(trainer, pl_module)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 259, in save_checkpoint
self._save_top_k_checkpoints(trainer, pl_module, monitor_candidates)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 566, in _save_top_k_checkpoints
self._update_best_and_save(current, epoch, step, trainer, pl_module, metrics)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 608, in _update_best_and_save
self._save_model(filepath, trainer, pl_module)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 335, in _save_model
self.save_function(filepath, self.save_weights_only)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/properties.py", line 327, in save_checkpoint
self.checkpoint_connector.save_checkpoint(filepath, weights_only)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/checkpoint_connector.py", line 408, in save_checkpoint
atomic_save(checkpoint, filepath)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py", line 63, in atomic_save
torch.save(checkpoint, bytesbuffer)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/torch/serialization.py", line 372, in save
_save(obj, opened_zipfile, pickle_module, pickle_protocol)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/torch/serialization.py", line 476, in _save
pickler.dump(obj)
TypeError: cannot pickle '_csv.writer' object
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "main.py", line 60, in <module>
trainer.train(parser.parse_args(), hp)
File "/home/feitao/Projects/music_unmix/lasaft/lasaft/source_separation/conditioned/scripts/trainer.py", line 177, in train
trainer.fit(model, training_dataloader, validation_dataloader)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 513, in fit
self.dispatch()
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 553, in dispatch
self.accelerator.start_training(self)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 74, in start_training
self.training_type_plugin.start_training(trainer)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 111, in start_training
self._results = trainer.run_train()
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 676, in run_train
self.train_loop.on_train_end()
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 134, in on_train_end
self.check_checkpoint_callback(should_update=True, is_last=True)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/training_loop.py", line 164, in check_checkpoint_callback
cb.on_validation_end(self.trainer, model)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 212, in on_validation_end
self.save_checkpoint(trainer, pl_module)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 259, in save_checkpoint
self._save_top_k_checkpoints(trainer, pl_module, monitor_candidates)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 566, in _save_top_k_checkpoints
self._update_best_and_save(current, epoch, step, trainer, pl_module, metrics)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 608, in _update_best_and_save
self._save_model(filepath, trainer, pl_module)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/callbacks/model_checkpoint.py", line 335, in _save_model
self.save_function(filepath, self.save_weights_only)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/properties.py", line 327, in save_checkpoint
self.checkpoint_connector.save_checkpoint(filepath, weights_only)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/checkpoint_connector.py", line 408, in save_checkpoint
atomic_save(checkpoint, filepath)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/pytorch_lightning/utilities/cloud_io.py", line 63, in atomic_save
torch.save(checkpoint, bytesbuffer)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/torch/serialization.py", line 372, in save
_save(obj, opened_zipfile, pickle_module, pickle_protocol)
File "/opt/conda/envs/lasaft/lib/python3.8/site-packages/torch/serialization.py", line 476, in _save
pickler.dump(obj)
TypeError: cannot pickle '_csv.writer' object
I found if I don't use any logger, say only use:
log = args['log']
if log == 'False':
args['logger'] = False
elif log == 'wandb':
args['logger'] = WandbLogger(project='lasaft_exp', tags=[model_name], offline=False, name=run_id)
args['logger'].log_hyperparams(model.hparams)
args['logger'].watch(model, log='all')
elif log == 'tensorboard':
raise NotImplementedError
else:
args['logger'] = True # default
#default_log_path = os.path.join(ckpt_path,'lightning_logs')
#args['logger'] = CSVLogger(default_log_path, version='0')
#mkdir_if_not_exists(default_log_path)
Then the code works perfectly (except that I cannot track the loss). Can anyone help? Thank you.
hi @sun-peach, I think it is a version issue.
I added lines for the csv logger (see https://github.com/ws-choi/Conditioned-Source-Separation-LaSAFT/commit/556904c506124f89d074d14c4b336b3d62070152). I checked that it works fine for training.
Please check the version of each library.
Especially, check versions of the following libraries.
pytorch-lightning==1.1.6
torch==1.7.1
