bert4vec
bert4vec copied to clipboard
执行model.encode出错

代码:
coding: utf-8
import traceback import faulthandler faulthandler.enable()
from tqdm import *
import pandas as pd from bert4vec import Bert4Vec
model = Bert4Vec(mode='simbert-base', model_name_or_path="/Users/ruiliu/Downloads/simbert-base-chinese")
model = Bert4Vec(mode='roformer-sim-base', model_name_or_path="/Users/ruiliu/Downloads/roformer-sim-base-chinese")
def load_excel(excel_path, sheet_name=0): data_list = [] data = pd.read_csv(excel_path, keep_default_na=False) for i in data.index.values: data_list.append(data.loc[i].to_dict()) return data_list
if name == "main": try: data_list = load_excel("data/交互平台问答数据-ALL-CODE.csv") o_file = open("data/simbert_no_normal_vector.txt", "w", encoding="utf-8") for data in data_list: sentences = [data["标准问题*"]] vectors = model.encode(sentences, batch_size=64, convert_to_numpy=True, normalize_to_unit=False) # print(vectors) # vector = [str(float(i)) for i in vectors[0]] # print(vector) # o_file.writelines(sentences[0] + "--" + "--".join(vector) + "\n") o_file.close() except Exception as e: print(traceback.format_exc())