Why do the embeddings from my trained BGE-M3 model match those from the original model?

Open shyzzz521 opened this issue 2 months ago • 0 comments

Could there be an issue with the parameter settings in my training script?

export WANDB_MODE=disabled

train_data="
/home/jovyan/dataws1/bgeft/train_table_data "

set large epochs and small batch size for testing

num_train_epochs=1 per_device_train_batch_size=1

set num_gpus to 2 for testing

num_gpus=2

if [ -z "$HF_HUB_CACHE" ]; then export HF_HUB_CACHE="$HOME/.cache/huggingface/hub" fi

model_args="
--model_name_or_path /home/jovyan/dataws1/bgeft/models/bge-m3
--cache_dir $HF_HUB_CACHE
"

data_args="
--train_data $train_data
--cache_path /home/jovyan/dataws1/bgeft/cachedata
--train_group_size 8
--query_max_len 8192
--passage_max_len 8192
--pad_to_multiple_of 8
--knowledge_distillation False
"

training_args="
--output_dir /home/jovyan/dataws1/bgeft/output/bge-m3_ft1
--overwrite_output_dir
--learning_rate 1e-5
--fp16
--num_train_epochs $num_train_epochs
--per_device_train_batch_size $per_device_train_batch_size
--dataloader_drop_last True
--warmup_ratio 0.1
--gradient_checkpointing
--deepspeed /home/jovyan/dataws1/bgeft/FlagEmbedding/examples/finetune/ds_stage0.json
--logging_steps 1
--save_steps 20000
--negatives_cross_device
--temperature 0.02
--sentence_pooling_method cls
--normalize_embeddings True
--kd_loss_type m3_kd_loss
--unified_finetuning True
--use_self_distill True
--fix_encoder False
--self_distill_start_step 0
--report_to tensorboard
"

cmd="torchrun --nproc_per_node $num_gpus
-m FlagEmbedding.finetune.embedder.encoder_only.m3
$model_args
$data_args
$training_args
"

echo $cmd eval $cmd

Nov 15 '25 09:11 shyzzz521