sparseml
sparseml copied to clipboard
[SparseZoo v2 Bridge] Save transformers training artifacts to ModelDirectory directory
Allows creating the ModelDirectory local directory from the direct output of the transformers training procedure.
To test:
- You need to install locally the sparsezoo's
feature/ci-cd-refactor/validationsbranch - Then run:
import os.path
from sparseml.transformers.utils import get_model_directory
from sparsezoo.v2.model_directory import ModelDirectory
# where the training artifacts are
path_to_training_outputs = "/home/damian/sparseml/output"
save_dir = "/home/damian/my_model_dir"
# create model card if none exists
model_card_path = os.path.join(path_to_training_outputs, "model.md")
if not os.path.exists(model_card_path):
open(model_card_path, "w").close()
get_model_directory(
output_dir=save_dir,
training_outputs_dir=path_to_training_outputs,
logs_path=os.path.join(path_to_training_outputs, "runs"),
)
# print the resulting directory
start_path = save_dir
for path, dirs, files in os.walk(start_path):
for filename in files:
print(os.path.join(path, filename))
# this will fail in a controlled manner (the model card is empty)
assert ModelDirectory.from_directory(save_dir)
Output:
/home/damian/my_model_dir/model.onnx
/home/damian/my_model_dir/model.md
/home/damian/my_model_dir/sample_inputs/inp-0002.npz
/home/damian/my_model_dir/sample_inputs/inp-0005.npz
...
/home/damian/my_model_dir/sample_outputs/out-0000.npz
/home/damian/my_model_dir/sample_outputs/out-0005.npz
...
/home/damian/my_model_dir/training/eval_results.json
/home/damian/my_model_dir/training/training_args.bin
/home/damian/my_model_dir/training/vocab.txt
/home/damian/my_model_dir/training/all_results.json
/home/damian/my_model_dir/training/trainer_state.json
/home/damian/my_model_dir/training/special_tokens_map.json
/home/damian/my_model_dir/training/train_results.json
/home/damian/my_model_dir/training/recipe.yaml
/home/damian/my_model_dir/training/pytorch_model.bin
/home/damian/my_model_dir/training/config.json
/home/damian/my_model_dir/training/tokenizer_config.json
/home/damian/my_model_dir/training/tokenizer.json
/home/damian/my_model_dir/deployment/model.onnx
/home/damian/my_model_dir/deployment/config.json
/home/damian/my_model_dir/deployment/tokenizer.json
/home/damian/my_model_dir/logs/Jul01_14-12-31_lambdaquad/events.out.tfevents.1656699325.lambdaquad.2354562.2
/home/damian/my_model_dir/logs/Jul01_14-12-31_lambdaquad/events.out.tfevents.1656699285.lambdaquad.2354562.0
/home/damian/my_model_dir/logs/Jul01_14-12-31_lambdaquad/1656699285.8326883/events.out.tfevents.1656699285.lambdaquad.2354562.1
/home/damian/my_model_dir/logs/Jul01_14-21-16_lambdaquad/events.out.tfevents.1656699725.lambdaquad.2356466.2
/home/damian/my_model_dir/logs/Jul01_14-21-16_lambdaquad/events.out.tfevents.1656699685.lambdaquad.2356466.0
/home/damian/my_model_dir/logs/Jul01_14-21-16_lambdaquad/1656699685.4507694/events.out.tfevents.1656699685.lambdaquad.2356466.1
Traceback (most recent call last):
File "/home/damian/sparseml/src/sparseml/transformers/utils/test.py", line 28, in <module>
assert ModelDirectory.from_directory(save_dir)
File "/home/damian/sparsezoo/src/sparsezoo/v2/model_directory.py", line 196, in from_directory
return ModelDirectory(files=files, name="model_directory", path=directory_path)
File "/home/damian/sparsezoo/src/sparsezoo/v2/model_directory.py", line 84, in __init__
] = self._sample_outputs_list_to_dict(
File "/home/damian/sparsezoo/src/sparsezoo/v2/model_directory.py", line 474, in _sample_outputs_list_to_dict
framework_name = self.model_card._validate_model_card()["framework"]
TypeError: 'NoneType' object is not subscriptable
@KSGulin assigned for review
Could you also add an example command to get training artifacts in the description? maybe a one-shot run?