Auto-UI
Auto-UI copied to clipboard
Unable to run the model
Hi, I am following the steps in the Readme to run the model. My goal is to be able to run the model to be able to provide my inputs. I dont want to train the model.
I did the following:
- Downloaded the dataset blip from https://huggingface.co/cooelf/Auto-UI/tree/main and placed in the folder dataset.
On running the command-
python -m torch.distributed.launch --nproc_per_node=8 --use_env main.py \
--data_root blip \
--model declare-lab/flan-alpaca-base \
--epoch 10 --lr 1e-4 \
--user_msg seq_future_blip_axis_all0.1_hist8_future4 --img_type blip --img_dim 1408 \
--bs 4 --eval_bs 16 --input_len 512 --output_len 128 --eval_acc 40 \
--transform_axis --warmup_ratio 0.05 \
--all_data 0.1 \
--use_history 8 \
--use_future 4 \
--eval_subset dataset/blip/general_blip \
--output_dir experiments
I get the following error :
args Namespace(all_data=0.1, bs=4, data_ratio=None, data_root='blip', debug_num=None, epoch=10, eval_acc=40, eval_bs=16, eval_name=None, eval_subset='dataset/blip/general_blip', evaluate_dir=None, final_eval=False, img_dim=1408, img_type='blip', input_len=512, local_rank=-1, lr=0.0001, model='declare-lab/flan-alpaca-base', output_dir='experiments', output_len=128, seed=42, transform_axis=True, use_future=4, use_generate=True, use_history=8, use_img_history=False, use_layout=False, user_msg='seq_future_blip_axis_all0.1_hist8_future4', warmup_ratio=0.05)
====Input Arguments====
{
"data_root": "blip",
"output_dir": "experiments",
"model": "declare-lab/flan-alpaca-base",
"data_ratio": null,
"eval_name": null,
"local_rank": -1,
"epoch": 10,
"lr": 0.0001,
"warmup_ratio": 0.05,
"bs": 4,
"debug_num": null,
"input_len": 512,
"output_len": 128,
"img_dim": 1408,
"eval_bs": 16,
"eval_acc": 40,
"all_data": 0.1,
"eval_subset": "dataset/blip/general_blip",
"use_history": 8,
"use_img_history": false,
"use_future": 4,
"use_layout": false,
"transform_axis": true,
"use_generate": true,
"final_eval": false,
"user_msg": "seq_future_blip_axis_all0.1_hist8_future4",
"img_type": "blip",
"evaluate_dir": null,
"seed": 42
}
args Namespace(all_data=0.1, bs=4, data_ratio=None, data_root='blip', debug_num=None, epoch=10, eval_acc=40, eval_bs=16, eval_name=None, eval_subset='dataset/blip/general_blip', evaluate_dir=None, final_eval=False, img_dim=1408, img_type='blip', input_len=512, local_rank=-1, lr=0.0001, model='declare-lab/flan-alpaca-base', output_dir='experiments', output_len=128, seed=42, transform_axis=True, use_future=4, use_generate=True, use_history=8, use_img_history=False, use_layout=False, user_msg='seq_future_blip_axis_all0.1_hist8_future4', warmup_ratio=0.05)
====Input Arguments====
{
"data_root": "blip",
"output_dir": "experiments",
"model": "declare-lab/flan-alpaca-base",
"data_ratio": null,
"eval_name": null,
"local_rank": -1,
"epoch": 10,
"lr": 0.0001,
"warmup_ratio": 0.05,
"bs": 4,
"debug_num": null,
"input_len": 512,
"output_len": 128,
"img_dim": 1408,
"eval_bs": 16,
"eval_acc": 40,
"all_data": 0.1,
"eval_subset": "dataset/blip/general_blip",
"use_history": 8,
"use_img_history": false,
"use_future": 4,
"use_layout": false,
"transform_axis": true,
"use_generate": true,
"final_eval": false,
"user_msg": "seq_future_blip_axis_all0.1_hist8_future4",
"img_type": "blip",
"evaluate_dir": null,
"seed": 42
}
args Namespace(all_data=0.1, bs=4, data_ratio=None, data_root='blip', debug_num=None, epoch=10, eval_acc=40, eval_bs=16, eval_name=None, eval_subset='dataset/blip/general_blip', evaluate_dir=None, final_eval=False, img_dim=1408, img_type='blip', input_len=512, local_rank=-1, lr=0.0001, model='declare-lab/flan-alpaca-base', output_dir='experiments', output_len=128, seed=42, transform_axis=True, use_future=4, use_generate=True, use_history=8, use_img_history=False, use_layout=False, user_msg='seq_future_blip_axis_all0.1_hist8_future4', warmup_ratio=0.05)
====Input Arguments====
{
"data_root": "blip",
"output_dir": "experiments",
"model": "declare-lab/flan-alpaca-base",
"data_ratio": null,
"eval_name": null,
"local_rank": -1,
"epoch": 10,
"lr": 0.0001,
"warmup_ratio": 0.05,
"bs": 4,
"debug_num": null,
"input_len": 512,
"output_len": 128,
"img_dim": 1408,
"eval_bs": 16,
"eval_acc": 40,
"all_data": 0.1,
"eval_subset": "dataset/blip/general_blip",
"use_history": 8,
"use_img_history": false,
"use_future": 4,
"use_layout": false,
"transform_axis": true,
"use_generate": true,
"final_eval": false,
"user_msg": "seq_future_blip_axis_all0.1_hist8_future4",
"img_type": "blip",
"evaluate_dir": null,
"seed": 42
}
args Namespace(all_data=0.1, bs=4, data_ratio=None, data_root='blip', debug_num=None, epoch=10, eval_acc=40, eval_bs=16, eval_name=None, eval_subset='dataset/blip/general_blip', evaluate_dir=None, final_eval=False, img_dim=1408, img_type='blip', input_len=512, local_rank=-1, lr=0.0001, model='declare-lab/flan-alpaca-base', output_dir='experiments', output_len=128, seed=42, transform_axis=True, use_future=4, use_generate=True, use_history=8, use_img_history=False, use_layout=False, user_msg='seq_future_blip_axis_all0.1_hist8_future4', warmup_ratio=0.05)
====Input Arguments====
{
"data_root": "blip",
"output_dir": "experiments",
"model": "declare-lab/flan-alpaca-base",
"data_ratio": null,
"eval_name": null,
"local_rank": -1,
"epoch": 10,
"lr": 0.0001,
"warmup_ratio": 0.05,
"bs": 4,
"debug_num": null,
"input_len": 512,
"output_len": 128,
"img_dim": 1408,
"eval_bs": 16,
"eval_acc": 40,
"all_data": 0.1,
"eval_subset": "dataset/blip/general_blip",
"use_history": 8,
"use_img_history": false,
"use_future": 4,
"use_layout": false,
"transform_axis": true,
"use_generate": true,
"final_eval": false,
"user_msg": "seq_future_blip_axis_all0.1_hist8_future4",
"img_type": "blip",
"evaluate_dir": null,
"seed": 42
}
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
[20:18:30] [Model]: Loading declare-lab/flan-alpaca-base... main.py:83
[Data]: Reading data... main.py:84
experiments/seq_future_blip_axis_all0.1_hist8_future4_declare-lab-flan-alpaca-base_blip_lr0.0001_bs0_ip512_op128_ep10
model.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 990M/990M [00:17<00:00, 56.1MB/s]
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['mha_layer.out_proj.bias', 'gate_dense.bias', 'mha_layer.in_proj_bias', 'image_dense.weight', 'mha_layer.out_proj.weight', 'mha_layer.in_proj_weight', 'gate_dense.weight', 'image_dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['mha_layer.out_proj.bias', 'gate_dense.bias', 'mha_layer.in_proj_bias', 'gate_dense.weight', 'mha_layer.in_proj_weight', 'mha_layer.out_proj.weight', 'image_dense.weight', 'image_dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['image_dense.bias', 'mha_layer.out_proj.weight', 'image_dense.weight', 'mha_layer.in_proj_bias', 'gate_dense.bias', 'gate_dense.weight', 'mha_layer.out_proj.bias', 'mha_layer.in_proj_weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['mha_layer.out_proj.weight', 'mha_layer.in_proj_weight', 'mha_layer.in_proj_bias', 'gate_dense.bias', 'gate_dense.weight', 'mha_layer.out_proj.bias', 'image_dense.bias', 'image_dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['image_dense.weight', 'mha_layer.out_proj.weight', 'image_dense.bias', 'mha_layer.out_proj.bias', 'mha_layer.in_proj_bias', 'gate_dense.bias', 'mha_layer.in_proj_weight', 'gate_dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['mha_layer.out_proj.bias', 'gate_dense.bias', 'gate_dense.weight', 'mha_layer.in_proj_bias', 'mha_layer.out_proj.weight', 'mha_layer.in_proj_weight', 'image_dense.weight', 'image_dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['mha_layer.in_proj_bias', 'mha_layer.in_proj_weight', 'gate_dense.bias', 'image_dense.weight', 'mha_layer.out_proj.weight', 'mha_layer.out_proj.bias', 'gate_dense.weight', 'image_dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of T5ForMultimodalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-base and are newly initialized: ['mha_layer.in_proj_bias', 'gate_dense.weight', 'gate_dense.bias', 'mha_layer.out_proj.bias', 'mha_layer.in_proj_weight', 'image_dense.bias', 'mha_layer.out_proj.weight', 'image_dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
generation_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 142/142 [00:00<00:00, 25.5kB/s]
loading general 0
loading general 0
loading general 0
loading general 0loading general
0
loading general loading general0
0
loading general 0
loading google_apps 7580
loading google_apps 7580
loading google_apps 7580
loading google_apps 7580
loading google_apps 7580
loading google_apps 7580
loading google_apps 7580
loading google_apps 7580
[2024-01-07 20:20:07,853] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 19300 closing signal SIGTERM
[2024-01-07 20:20:07,855] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 19301 closing signal SIGTERM
[2024-01-07 20:20:07,855] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 19302 closing signal SIGTERM
[2024-01-07 20:20:07,855] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 19303 closing signal SIGTERM
[2024-01-07 20:20:07,855] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 19304 closing signal SIGTERM
[2024-01-07 20:20:07,855] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 19306 closing signal SIGTERM
[2024-01-07 20:20:07,855] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 19307 closing signal SIGTERM
[2024-01-07 20:20:08,928] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: -9) local_rank: 5 (pid: 19305) of binary: /home/skirti/.pyenv/versions/3.8.11/bin/python
Traceback (most recent call last):
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/site-packages/torch/distributed/launch.py", line 196, in <module>
main()
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/site-packages/torch/distributed/launch.py", line 192, in main
launch(args)
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/site-packages/torch/distributed/launch.py", line 177, in launch
run(args)
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/site-packages/torch/distributed/run.py", line 797, in run
elastic_launch(
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 134, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/skirti/.pyenv/versions/3.8.11/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 264, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
============================================================
main.py FAILED
------------------------------------------------------------
Failures:
<NO_OTHER_FAILURES>
------------------------------------------------------------
Root Cause (first observed failure):
[0]:
time : 2024-01-07_20:20:07
host : 211b70a3
rank : 5 (local_rank: 5)
exitcode : -9 (pid: 19305)
error_file: <N/A>
traceback : Signal 9 (SIGKILL) received by PID 19305
============================================================
Any pointers on what is causing this?
Check jaxlib version with your cuda.
I installed using this code.
pip install --upgrade "jax[cuda12_pip]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
if you get cuda OOM error, then use this code in front of python main.py
export XLA_PYTHON_CLIENT_PREALLOCATE=false && python -m torch.distributed.launch --nproc_per_node=4 --use_env main.py
this worked for me.