In order to be able to read the local GLUE data set, I modified the GLUE code so that the attack evaluation can be carried out, but the current score of the output is always 0. I want to know why?
class GLUE(Dataset): """ GLUE class is a dataset class for the General Language Understanding Evaluation benchmark, supporting multiple natural language understanding tasks.
Examples:
[{'content': "it 's a charming and often affecting journey . ", 'label': 1}, {'content': 'unflinchingly bleak and desperate ', 'label': 0}, ...]
"""
# def __init__(self, task):
# self.data = []
# self.supported_tasks = ["sst2", "cola", "qqp", "mnli",
# "mnli_matched", "mnli_mismatched", "qnli", "wnli", "rte", "mrpc"]
# assert task in self.supported_tasks
#
# self.task = task
#
# if self.task == "mnli":
# from datasets import concatenate_datasets
# matched = load_dataset('glue', 'mnli')["validation_matched"]
# mismatched = load_dataset("glue", "mnli")["validation_mismatched"]
# data = concatenate_datasets([matched, mismatched])
# else:
# data = load_dataset("glue", task)["validation"]
#
# for d in data:
# if task == "sst2" or task == "cola":
# content = d['sentence']
# elif task == 'qqp':
# content = 'Question 1: ' + \
# d['question1'] + ' Question 2: ' + \
# d['question2']
# elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
# content = 'Premise: ' + \
# d['premise'] + ' Hypothesis: ' + \
# d['hypothesis']
# elif task == 'qnli':
# content = 'Question: ' + \
# d['question'] + ' Context: ' + \
# d['sentence']
# elif task == 'rte' or task == 'mrpc' or task == "wnli":
# content = 'Sentence 1: ' + \
# d['sentence1'] + ' Sentence 2: ' + \
# d['sentence2']
# else:
# raise NotImplementedError
#
# self.data.append({"content": content, "label": d['label']})
def __init__(self, task, local_path=None):
self.data = []
self.supported_tasks = ["sst2", "cola", "qqp", "mnli",
"mnli_matched", "mnli_mismatched", "qnli", "wnli", "rte", "mrpc"]
assert task in self.supported_tasks
self.task = task
if local_path:
if self.task == "mnli":
matched_df = pd.read_parquet(f"{local_path}/mnli_matched")
mismatched_df = pd.read_parquet(f"{local_path}/mnli_mismatched")
#将pandas dataframe转换为datasets.Dataset
matched=datasets.Dataset.from_pandas(matched_df)
mismatched=datasets.Dataset.from_pandas(mismatched_df)
from datasets import concatenate_datasets
data = concatenate_datasets([matched, mismatched])
else:
#读取其他任务的数据
df = pd.read_parquet(f"{local_path}/{task}")
data=datasets.Dataset.from_pandas(df)
else:
#从huggingface datasets加载数据 如果没有本地数据
if self.task == "mnli":
from datasets import concatenate_datasets
matched = load_dataset('glue', 'mnli')["validation_matched"]
mismatched = load_dataset("glue", "mnli")["validation_mismatched"]
data = concatenate_datasets([matched, mismatched])
else:
data = load_dataset("glue", task)["validation"]
for d in data:
if task == "sst2" or task == "cola":
content = d['sentence']
elif task == 'qqp':
content = 'Question 1: ' + \
d['question1'] + ' Question 2: ' + \
d['question2']
elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
content = 'Premise: ' + \
d['premise'] + ' Hypothesis: ' + \
d['hypothesis']
elif task == 'qnli':
content = 'Question: ' + \
d['question'] + ' Context: ' + \
d['sentence']
elif task == 'rte' or task == 'mrpc' or task == "wnli":
content = 'Sentence 1: ' + \
d['sentence1']+' Sentence 2: ' + \
d['sentence2']
else:
raise NotImplementedError
self.data.append({"content": content, "label": d['label']})
解决了问题
for d in data:
if task == "sst2" or task == "cola":
content = d['sentence']
elif task == 'qqp':
content = 'Question 1: ' +
d['question1'] + ' Question 2: ' +
d['question2']
elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
content = 'Premise: ' +
d['premise'] + ' Hypothesis: ' +
d['hypothesis']
elif task == 'qnli':
content = 'Question: ' +
d['question'] + ' Context: ' +
d['sentence']
elif task == 'rte' or task == 'mrpc' or task == "wnli":
content = 'Sentence 1: ' +
d['sentence1']+' Sentence 2: ' +
d['sentence2']
else:
raise NotImplementedError
self.data.append({"content": content, "label": d['label']})
这个代码表述很准确但是在实际操作中,最后一句的self.data.append({"content": content, "label": d['label']})添加的标签并不是正确的0 1而是-1 因此需要加两个强制判断语句
Thank you very much for the contribution! We will look into this.
class GLUE(Dataset): def init(self, task, local_path=None): self.data = [] self.supported_tasks = ["sst2", "cola", "qqp", "mnli", "mnli_matched", "mnli_mismatched", "qnli", "wnli", "rte", "mrpc"] assert task in self.supported_tasks
self.task = task
if local_path:
if self.task == "mnli":
matched_df = pd.read_parquet(f"{local_path}/mnli_matched")
mismatched_df = pd.read_parquet(f"{local_path}/mnli_mismatched")
#将pandas dataframe转换为datasets.Dataset
matched=datasets.Dataset.from_pandas(matched_df)
mismatched=datasets.Dataset.from_pandas(mismatched_df)
from datasets import concatenate_datasets
data = concatenate_datasets([matched, mismatched])
else:
#读取其他任务的数据
df = pd.read_parquet(f"{local_path}/{task}")
data=datasets.Dataset.from_pandas(df)
else:
#从huggingface datasets加载数据 如果没有本地数据
if self.task == "mnli":
from datasets import concatenate_datasets
matched = load_dataset('glue', 'mnli')["validation_matched"]
mismatched = load_dataset("glue", "mnli")["validation_mismatched"]
data = concatenate_datasets([matched, mismatched])
else:
data = load_dataset("glue", task)["validation"]
for d in data:
if task == "sst2" or task == "cola":
content = d['sentence']
elif task == 'qqp':
content = 'Question 1: ' + \
d['question1'] + ' Question 2: ' + \
d['question2']
elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
content = 'Premise: ' + \
d['premise'] + ' Hypothesis: ' + \
d['hypothesis']
elif task == 'qnli':
content = 'Question: ' + \
d['question'] + ' Context: ' + \
d['sentence']
elif task == 'rte' or task == 'mrpc' or task == "wnli":
content = 'Sentence 1: ' + \
d['sentence1'] + ' Sentence 2: ' + \
d['sentence2']
else:
raise NotImplementedError
if(d['label']==1):
self.data.append({"content": content, "label": 1})
elif (d['label']==0):
self.data.append({"content": content, "label": 0})
Stale issue message