glyce icon indicating copy to clipboard operation
glyce copied to clipboard

train.json cant't find

Open Lirsakura opened this issue 4 years ago • 1 comments

can you tell me Where is train.json

FileNotFoundError: [Errno 2] No such file or directory: '/data/bq/train.json'

Lirsakura avatar Dec 20 '20 15:12 Lirsakura

Data files for bq all have the postfix .tsv (not .json)
I modify the function in class BQProcessor as below, and it successfully loads bq corpus,
you can view the changes in my fork:


# Updated
class BQProcessor(DataProcessor):
    """Processor for the dbqa data set """

    def get_train_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")

    def get_dev_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "valid.tsv")),
            "dev_matched")

    def get_test_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "test.tsv")),
            "dev_matched")

    def get_labels(self):
        """See base class."""
        return ["0", "1"]

    # Original
    def _create_examples_old(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines):
            line = json.loads(line[0])
            if i < 2:
               print("-"*10) 
               print("check  loading example") 
               print(line)
               print(type(line)) 
            guid = "%s-%s" % (set_type, i)
            text_a = line[1] # .replace(" ", "")
            text_b = line[2] # .replace(" ", "")
            label = line
            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples

    def _create_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines):
            if i < 2:
               print("-"*10) 
               print("check  loading example") 
               print(line)
               print(type(line)) 
            guid = "%s-%s" % (set_type, i)
            text_a = line[1] # .replace(" ", "")
            text_b = line[2] # .replace(" ", "")
            label = line[0]
            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples

okcd00 avatar Apr 22 '21 09:04 okcd00