autolabel
autolabel copied to clipboard
added support for hugging face datasets
trafficstars
We can now load datasets from Huggingface as show below
dataset = load_dataset("lex_glue", "ledgar")
test_dataset = dataset["test"]
test_dataset = map_label_to_string(test_dataset, "label")
test_dataset = test_dataset.rename_column("text", "example")
ledgar_path = Path("../autolabel/examples/ledgar")
with open(ledgar_path / "config_ledgar.json", "r") as f:
config = json.load(f)
if "few_shot_examples" in config["prompt"] and isinstance(config["prompt"]["few_shot_examples"], str):
config["prompt"]["few_shot_examples"] = str(ledgar_path / config["prompt"]["few_shot_examples"])
agent = LabelingAgent(config)
agent.plan(test_dataset, max_items=8)
agent.run(test_dataset, max_items=8)