Unable to finetune transformer based ner model after initial tuning
Discussed in https://github.com/explosion/spaCy/discussions/13394
Originally posted by jlustgarten March 23, 2024
How to reproduce the behaviour
Create a transformer ner model Train it on data using the cfg and cli which auto-saves it Create a new cfg file that points to your existing model Try triggering the training using the CLI You will get a missing config.json error
Your Environment
- spaCy version: 3.7.2
- Platform: Linux-5.15.146.1-microsoft-standard-WSL2-x86_64-with-glibc2.35
- Python version: 3.10.13
This still is occurring with the same text: Config: [paths] train = null dev = null vectors = null init_tok2vec = null
[system] gpu_allocator = "pytorch" seed = 0
[nlp] lang = "en" pipeline = ["transformer","ner"] batch_size = 128 disabled = [] before_creation = null after_creation = null after_pipeline_creation = null tokenizer = {"@Tokenizers":"spacy.Tokenizer.v1"} vectors = {"@vectors":"spacy.Vectors.v1"}
[components]
[components.ner] factory = "ner" incorrect_spans_key = null moves = null scorer = {"https://github.com/scorers":"spacy.ner_scorer.v1"} update_with_oracle_cut_size = 100
[components.ner.model] @architectures = "spacy.TransitionBasedParser.v2" state_type = "ner" extra_state_tokens = false hidden_width = 64 maxout_pieces = 2 use_upper = false nO = null
[components.ner.model.tok2vec] @architectures = "spacy-transformers.TransformerListener.v1" grad_factor = 1.0 pooling = {"https://github.com/layers":"reduce_mean.v1"} upstream = "*"
[components.transformer] factory = "transformer" max_batch_items = 4096 set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
[components.transformer.model] @architectures = "spacy-transformers.TransformerModel.v3" name = "/home/user/Coding/PatientHistory/original_pt_hist_ner" mixed_precision = false
[components.transformer.model.get_spans] @span_getters = "spacy-transformers.strided_spans.v1" window = 128 stride = 96
[components.transformer.model.grad_scaler_config]
[components.transformer.model.tokenizer_config] use_fast = true
[components.transformer.model.transformer_config]
[corpora]
[corpora.dev] @readers = "spacy.Corpus.v1" path = ${paths.dev} max_length = 0 gold_preproc = false limit = 0 augmenter = null
[corpora.train] @readers = "spacy.Corpus.v1" path = ${paths.train} max_length = 0 gold_preproc = false limit = 0 augmenter = null
[training] accumulate_gradient = 4 dev_corpus = "corpora.dev" train_corpus = "corpora.train" seed = ${system.seed} gpu_allocator = ${system.gpu_allocator} dropout = 0.1 patience = 2000 max_epochs = 0 max_steps = 80000 eval_frequency = 200 frozen_components = [] annotating_components = [] before_to_disk = null before_update = null
[training.batcher] @batchers = "spacy.batch_by_padded.v1" discard_oversize = false size = 2000 buffer = 256 get_length = null
[training.logger] @Loggers = "spacy.ConsoleLogger.v1" progress_bar = false
[training.optimizer] https://github.com/optimizers = "Adam.v1" beta1 = 0.9 beta2 = 0.999 L2_is_weight_decay = true L2 = 0.01 grad_clip = 1.0 use_averages = false eps = 0.00000001
[training.optimizer.learn_rate] https://github.com/schedules = "warmup_linear.v1" warmup_steps = 250 total_steps = 200000 initial_rate = 0.00005
[training.score_weights] ents_f = 1.0 ents_p = 0.0 ents_r = 0.0 ents_per_type = null
[pretraining]
[initialize] vectors = ${paths.vectors} init_tok2vec = ${paths.init_tok2vec} vocab_data = null lookups = null before_init = null after_init = null
[initialize.components]
[initialize.tokenizer]
Here's the CLI: python -m spacy train '/home/user/Coding/PatientHistory/refine_pt_hist_ner.cfg' --output '/home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024' --paths.train '/home/user/Coding/PatientHistory/train.spacy' --paths.dev '/home/user/Coding/PatientHistory/test.spacy' --gpu-id 0 Here's the output: ℹ Saving to output directory: /home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024 ℹ Using GPU: 0
=========================== Initializing pipeline =========================== /home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. _torch_pytree._register_pytree_node( /home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. _torch_pytree._register_pytree_node( Traceback (most recent call last): File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/main.py", line 4, in setup_cli() File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/_util.py", line 87, in setup_cli command(prog_name=COMMAND) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1157, in call return self.main(*args, **kwargs) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 778, in main return _main( File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 216, in _main rv = self.invoke(ctx) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1688, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1434, in invoke return ctx.invoke(self.callback, **ctx.params) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 783, in invoke return __callback(*args, **kwargs) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper return callback(**use_params) # type: ignore File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 54, in train_cli train(config_path, output_path, use_gpu=use_gpu, overrides=overrides) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 81, in train nlp = init_nlp(config, use_gpu=use_gpu) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/training/initialize.py", line 95, in init_nlp nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/language.py", line 1349, in initialize proc.initialize(get_examples, nlp=self, **p_settings) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/pipeline_component.py", line 351, in initialize self.model.initialize(X=docs) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/thinc/model.py", line 318, in initialize self.init(self, X=X, Y=Y) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 131, in init hf_model = huggingface_from_pretrained(name, tok_cfg, trf_cfg) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 267, in huggingface_from_pretrained tokenizer = tokenizer_cls.from_pretrained(str_path, **tok_config) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 752, in from_pretrained config = AutoConfig.from_pretrained( File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 1082, in from_pretrained config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 644, in get_config_dict config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 699, in _get_config_dict resolved_config_file = cached_file( File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/hub.py", line 360, in cached_file raise EnvironmentError( OSError: /home/user/Coding/PatientHistory/original_pt_hist_ner does not appear to have a file named config.json. Checkout 'https://huggingface.co//home/user/Coding/PatientHistory/original_pt_hist_ner/None' for available files.