CodeTF
CodeTF copied to clipboard
ValueError: Config name is missing.
I tried to run the demo example for fine tuning the CodeT5+ Model in the README but set the CodeXGlue dataset from text-to-code
to code-to-text
. It would be helpful to have the option to set this var.
https://github.com/salesforce/CodeTF/blob/b6515706fd5934f2dc0d6045978b918a6dd3a63f/codetf/data_utility/codexglue_dataset.py#L41-L43
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[3], line 11
7 model_class = load_model_pipeline(model_name="codet5", task="pretrained",
8 model_type="plus-220M", is_eval=True)
10 dataset = CodeXGLUEDataset(tokenizer=model_class.get_tokenizer())
---> 11 train, test, validation = dataset.load(subset="code-to-text")
13 train_dataset= CustomDataset(train[0], train[1])
14 test_dataset= CustomDataset(test[0], test[1])
File [~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:19](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:19), in CodeXGLUEDataset.load(self, subset)
17 def load(self, subset):
18 if subset in self.load_funcs:
---> 19 return self.load_funcs[subset]()
20 else:
21 raise ValueError(f'Invalid subset {subset}. Available subsets are: {list(self.load_funcs.keys())}')
File [~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:43](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/projects/edu/master/CodeTF/codetf/data_utility/codexglue_dataset.py:43), in CodeXGLUEDataset.load_codexglue_code_to_text_dataset(self)
41 def load_codexglue_code_to_text_dataset(self):
42 dataset = self.dataset_config["codexglue_code_to_text"]
---> 43 dataset = load_dataset(dataset)
45 train = dataset["train"]
46 train_code_tensors, _ = self.process_data(train["code"])
File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1773](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1773), in load_dataset(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, storage_options, **config_kwargs)
1768 verification_mode = VerificationMode(
1769 (verification_mode or VerificationMode.BASIC_CHECKS) if not save_infos else VerificationMode.ALL_CHECKS
1770 )
1772 # Create a dataset builder
-> 1773 builder_instance = load_dataset_builder(
1774 path=path,
1775 name=name,
1776 data_dir=data_dir,
1777 data_files=data_files,
1778 cache_dir=cache_dir,
1779 features=features,
1780 download_config=download_config,
1781 download_mode=download_mode,
1782 revision=revision,
1783 use_auth_token=use_auth_token,
1784 storage_options=storage_options,
1785 **config_kwargs,
1786 )
1788 # Return iterable dataset in case of streaming
1789 if streaming:
File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1528](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/load.py:1528), in load_dataset_builder(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, storage_options, **config_kwargs)
1525 raise ValueError(error_msg)
1527 # Instantiate the dataset builder
-> 1528 builder_instance: DatasetBuilder = builder_cls(
1529 cache_dir=cache_dir,
1530 config_name=config_name,
1531 data_dir=data_dir,
1532 data_files=data_files,
1533 hash=hash,
1534 features=features,
1535 use_auth_token=use_auth_token,
1536 storage_options=storage_options,
1537 **builder_kwargs,
1538 **config_kwargs,
1539 )
1541 return builder_instance
File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:340](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:340), in DatasetBuilder.__init__(self, cache_dir, config_name, hash, base_path, info, features, use_auth_token, repo_id, data_files, data_dir, storage_options, writer_batch_size, name, **config_kwargs)
338 if data_dir is not None:
339 config_kwargs["data_dir"] = data_dir
--> 340 self.config, self.config_id = self._create_builder_config(
341 config_name=config_name,
342 custom_features=features,
343 **config_kwargs,
344 )
346 # prepare info: DatasetInfo are a standardized dataclass across all datasets
347 # Prefill datasetinfo
348 if info is None:
File [~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:469](https://vscode-remote+wsl-002bubuntu.vscode-resource.vscode-cdn.net/home/paul/projects/edu/master/mdl-ii/src/modeling/~/.conda/envs/codetf/lib/python3.8/site-packages/datasets/builder.py:469), in DatasetBuilder._create_builder_config(self, config_name, custom_features, **config_kwargs)
467 if len(self.BUILDER_CONFIGS) > 1:
468 example_of_usage = f"load_dataset('{self.name}', '{self.BUILDER_CONFIGS[0].name}')"
--> 469 raise ValueError(
470 "Config name is missing."
471 f"\nPlease pick one among the available configs: {list(self.builder_configs.keys())}"
472 + f"\nExample of usage:\n\t`{example_of_usage}`"
473 )
474 builder_config = self.BUILDER_CONFIGS[0]
475 logger.info(f"No config specified, defaulting to the single config: {self.name}/{builder_config.name}")
ValueError: Config name is missing.
Please pick one among the available configs: ['go', 'java', 'javascript', 'php', 'python', 'ruby']
Example of usage:
`load_dataset('code_x_glue_ct_code_to_text', 'go')`