Unstarted training
Hi, I need help with training a sentence transformer. The whole configuration went well, but after calling trainer.train(), the training progress is not shown and there are no errors also. One GPU is a bit full, probably just loading the model, but training is not happening - there are no records in the output dir and the progress bar is not showing. Does anyone know what to do about it?
Here is my packages versions: Name: datasets Version: 3.6.0
Name: setfit Version: 1.1.2
Name: sentence-transformers Version: 4.1.0
Name: torch Version: 2.7.1
Name: accelerate Version: 1.8.0
Name: pandas Version: 2.2.2
Here is my training script:
1) IMPORTS
import pandas as pd from datasets import Dataset from setfit import SetFitModel, Trainer, TrainingArguments from sentence_transformers.losses import ContrastiveLoss, CosineSimilarityLoss from transformers import TrainerCallback from sklearn.metrics import accuracy_score, f1_score
2) Binary classification dataset
def load_excel(path): df = pd.read_excel(path) df = df.rename(columns={'labels': 'label'}) df = df.drop(columns=['Unnamed: 0']) #df["label"] = df["label"].astype(int) df['label'] = (df['label'] == 4).astype(int) df["text"] = df["text"].astype(str) print(df.head(10)) return Dataset.from_pandas(df[["text", "label"]])
train_ds = load_excel("xxx") val_ds = load_excel("xx")
print(f"len of train_ds: {len(train_ds)}") print(f"len of val_ds: {len(val_ds)}")
3) MODEL
model = SetFitModel.from_pretrained("xxx/simcse_small_e_czech") # local path print("Model loaded successfully")
4) METRICS
def compute_metrics(y_pred, y_true): return { "accuracy": accuracy_score(y_true, y_pred), "f1": f1_score(y_true, y_pred, average="macro") }
5) CALLBACKS
class LogCallback(TrainerCallback): def init(self, logfile="xxx/training_log.txt"): self.logfile = logfile def on_evaluate(self, args, state, control, metrics=None, **kw): if metrics: msg = (f"epoch={state.epoch:.0f} " f"acc={metrics['accuracy']:.4f} " f"f1={metrics['f1']:.4f}") print(msg) with open(self.logfile, "a") as f: f.write(msg + "\n")
7) ARGUMENTS
training_args = TrainingArguments( output_dir="xxx/setfit_checkpoints", batch_size=16, num_epochs=3, body_learning_rate=1e-4, loss=ContrastiveLoss, logging_strategy="epoch", evaluation_strategy="epoch", save_strategy="epoch", load_best_model_at_end=True, metric_for_best_model="accuracy", greater_is_better=True, )
8) TRAINER
trainer = Trainer( model=model, train_dataset=train_ds, eval_dataset=val_ds, args=training_args, metric=compute_metrics, callbacks=[LogCallback()], column_mapping={"text": "text", "label": "label"}, )
9) TRAINING
print("Training started") trainer.train()
10) SAVE
print("Final metrics:", trainer.evaluate()) trainer.model.save_pretrained("final_model_contrastive")`