train on coco dataset
WARNING - root - Changed type of config entry "max_steps" from int to NoneType WARNING - ViLT - No observers have been added to this run INFO - ViLT - Running command 'main' INFO - ViLT - Started Global seed set to 0 INFO - lightning - Global seed set to 0 WARNING - timm.models.helpers - No pretrained weights exist or were found for this model. Using random initialization. ERROR - ViLT - Failed after 0:00:03! Traceback (most recent call last): File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/experiment.py", line 312, in run_commandline return self.run( File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/experiment.py", line 276, in run run() File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/run.py", line 238, in call self.result = self.main_function(*args) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/config/captured_function.py", line 42, in captured_function result = wrapped(*args, **kwargs) File "run.py", line 48, in main trainer = pl.Trainer( File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/env_vars_connector.py", line 41, in overwrite_by_env_vars return fn(self, **kwargs) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 347, in init self.accelerator_connector.on_trainer_init( File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator_connector.py", line 101, in on_trainer_init self.trainer.data_parallel_device_ids = device_parser.parse_gpu_ids(self.trainer.gpus) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/utilities/device_parser.py", line 78, in parse_gpu_ids gpus = _sanitize_gpu_ids(gpus) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/utilities/device_parser.py", line 139, in _sanitize_gpu_ids raise MisconfigurationException(f""" pytorch_lightning.utilities.exceptions.MisconfigurationException: You requested GPUs: [0, 1, 2, 3, 4, 5, 6, 7] But your machine only has: [0]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "run.py", line 11, in
You requested GPUs: [0, 1, 2, 3, 4, 5, 6, 7] But your machine only has: [0] You should set the num_gpus hyperparameter in your script.
Hello, I corrected the error you mentioned, but there are still the following two errors
WARNING - root - Changed type of config entry "max_steps" from int to NoneType WARNING - ViLT - No observers have been added to this run INFO - ViLT - Running command 'main' INFO - ViLT - Started Global seed set to 0 INFO - lightning - Global seed set to 0 GPU available: True, used: True INFO - lightning - GPU available: True, used: True TPU available: None, using: 0 TPU cores INFO - lightning - TPU available: None, using: 0 TPU cores Using environment variable NODE_RANK for node rank (). INFO - lightning - Using environment variable NODE_RANK for node rank (). ERROR - ViLT - Failed after 0:00:06! Traceback (most recent call last): File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/experiment.py", line 312, in run_commandline return self.run( File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/experiment.py", line 276, in run run() File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/run.py", line 238, in call self.result = self.main_function(*args) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/config/captured_function.py", line 42, in captured_function result = wrapped(*args, **kwargs) File "run.py", line 48, in main trainer = pl.Trainer( File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/env_vars_connector.py", line 41, in overwrite_by_env_vars return fn(self, **kwargs) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 347, in init self.accelerator_connector.on_trainer_init( File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator_connector.py", line 127, in on_trainer_init self.trainer.node_rank = self.determine_ddp_node_rank() File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator_connector.py", line 415, in determine_ddp_node_rank return int(rank) ValueError: invalid literal for int() with base 10: ''
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "run.py", line 11, in
export NODE_RANK=0
Traceback (most recent call last): File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/experiment.py", line 312, in run_commandline return self.run( File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/experiment.py", line 276, in run run() File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/run.py", line 238, in call self.result = self.main_function(*args) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/sacred/config/captured_function.py", line 42, in captured_function result = wrapped(*args, **kwargs) File "run.py", line 71, in main trainer.fit(model, datamodule=dm) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 473, in fit results = self.accelerator_backend.train() File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/accelerators/ddp_accelerator.py", line 152, in train results = self.ddp_train(process_idx=self.task_idx, model=model) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/accelerators/ddp_accelerator.py", line 268, in ddp_train self.trainer.call_setup_hook(model) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 859, in call_setup_hook self.datamodule.setup(stage_name) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/core/datamodule.py", line 92, in wrapped_fn return fn(*args, **kwargs) File "/root/autodl-tmp/ViLT-master/vilt/datamodules/multitask_datamodule.py", line 34, in setup dm.setup(stage) File "/root/miniconda3/envs/test/lib/python3.8/site-packages/pytorch_lightning/core/datamodule.py", line 92, in wrapped_fn return fn(*args, **kwargs) File "/root/autodl-tmp/ViLT-master/vilt/datamodules/datamodule_base.py", line 137, in setup self.set_train_dataset() File "/root/autodl-tmp/ViLT-master/vilt/datamodules/datamodule_base.py", line 76, in set_train_dataset self.train_dataset = self.dataset_cls( File "/root/autodl-tmp/ViLT-master/vilt/datasets/coco_caption_karpathy_dataset.py", line 17, in init super().init(*args, **kwargs, names=names, text_column_name="caption") File "/root/autodl-tmp/ViLT-master/vilt/datasets/base_dataset.py", line 53, in init self.table_names += [name] * len(tables[i]) IndexError: list index out of range
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "run.py", line 11, in
You should learn to debug yourself! Bro!
sorry ,i will try it again
Have you solved the problem?