glyce
glyce copied to clipboard
train.json cant't find
can you tell me Where is train.json
FileNotFoundError: [Errno 2] No such file or directory: '/data/bq/train.json'
Data files for bq
all have the postfix .tsv
(not .json
)
I modify the function in class BQProcessor
as below, and it successfully loads bq corpus,
you can view the changes in my fork:
# Updated
class BQProcessor(DataProcessor):
"""Processor for the dbqa data set """
def get_train_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
def get_dev_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "valid.tsv")),
"dev_matched")
def get_test_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "test.tsv")),
"dev_matched")
def get_labels(self):
"""See base class."""
return ["0", "1"]
# Original
def _create_examples_old(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
line = json.loads(line[0])
if i < 2:
print("-"*10)
print("check loading example")
print(line)
print(type(line))
guid = "%s-%s" % (set_type, i)
text_a = line[1] # .replace(" ", "")
text_b = line[2] # .replace(" ", "")
label = line
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i < 2:
print("-"*10)
print("check loading example")
print(line)
print(type(line))
guid = "%s-%s" % (set_type, i)
text_a = line[1] # .replace(" ", "")
text_b = line[2] # .replace(" ", "")
label = line[0]
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples