simdjson-go icon indicating copy to clipboard operation
simdjson-go copied to clipboard

object.ForEach() - object: unexpected name tag }

Open flowchartsman opened this issue 3 years ago • 0 comments

There appears to be a bug in objectForEach where the iterator over-reads the tape looking for a name or doesn't check for the object ending, maybe. Given the following example, which is an attempt to do array-transparent field iteration:

package main

import (
	"fmt"
	"log"

	"github.com/minio/simdjson-go"
)

func main() {
	// Parse JSON:
	pj, err := simdjson.Parse([]byte(`{
    "items": [
        {
            "name": "jim",
            "scores": [
                {
                    "game":"golf",
                    "scores": ["one","two"]
                },
                {
                    "game":"badminton",
                    "scores":["zero",1,"six"]
                },
                [
		            {
		              "game": "nested for some reason?",
                      "scores": ["five"]
                    }
                ]
            ]
        }
    ]
}`), nil)
	if err != nil {
		log.Fatal(err)
	}

	// Iterate each top level element.
	traverseKeys := map[string]struct{}{"items": {}, "poop": {}}
	var parse func(i simdjson.Iter) error
	parse = func(i simdjson.Iter) error {
		switch i.Type() {
		case simdjson.TypeArray:
			array, err := i.Array(nil)
			if err != nil {
				return err
			}
			array.ForEach(func(i simdjson.Iter) {
				parse(i)
			})
		case simdjson.TypeObject:
			obj, err := i.Object(nil)
			if err != nil {
				return err
			}
			scores := obj.FindKey("scores", nil)
			if scores == nil || scores.Type != simdjson.TypeArray {
				return obj.ForEach(func(_ []byte, i simdjson.Iter) {
					parse(i)
				}, traverseKeys)
			}
			array, err := scores.Iter.Array(nil)
			if err != nil {
				return err
			}
			array.ForEach(func(i simdjson.Iter) {
				switch i.Type() {
				case simdjson.TypeString, simdjson.TypeInt:
					s, _ := i.StringCvt()
					fmt.Println("Found score:", s)
				case simdjson.TypeObject, simdjson.TypeArray:
					parse(i)
				}
			})
		default:
			fmt.Println("Ignoring", i.Type())
		}

		return nil
	}
	if err := pj.ForEach(parse); err != nil {
		log.Fatal(err)
	}

	// Output:
	//Found score: one
	//Found score: two
	//Found score: zero
	//Found score: 1
	//Found score: six
	//Found score: five
	// 2022/03/18 00:44:08 object: unexpected name tag }
}

The problem appears to be here in parsed_object.go wherein the advance fails if not finding a string, but not making an allowance for the end of the object, while the actual check for the end of the object occurs later, here. If I'm reading this right, it would throw an error on the (valid) JSON of an empty object: {}.

Maybe something like this?

func (o *Object) ForEach(fn func(key []byte, i Iter), onlyKeys map[string]struct{}) error {
	tmp := o.tape.Iter()
	tmp.off = o.off
	n := 0
	for {
		typ := tmp.Advance()

		// Check for end of object
		if typ == TypeNone {
			if tmp.off == len(tmp.tape.Tape) {
				return nil
			}
			return fmt.Errorf("object: unexpected tag: %v", tmp.t)
		}

		// We want name and at least one value.
		if typ != TypeString {
			return fmt.Errorf("object: expecting object name string, found: %v", tmp.t)
		}
		// if the next item is the end of the tape or beyond it, then something
		// is wrong
		if tmp.off+1 >= len(tmp.tape.Tape) {
			return fmt.Errorf("unexpected end of object tape, expecting value")
		}
		// Get the name(object key)
		offset := tmp.cur
		length := tmp.tape.Tape[tmp.off]
		name, err := tmp.tape.stringByteAt(offset, length)
		if err != nil {
			return fmt.Errorf("getting object name: %w", err)
		}

		// if the user has specified that only certain keys be iterated, make
		// sure this one is among them
		if len(onlyKeys) > 0 {
			if _, ok := onlyKeys[string(name)]; !ok {
				// otherwise skip the value
				tmp.Advance()
				continue
			}
		}

		t := tmp.Advance()
		if t == TypeNone {
			return nil
		}
		fn(name, tmp)
		n++
		if n == len(onlyKeys) {
			return nil
		}
	}
}

flowchartsman avatar Mar 18 '22 05:03 flowchartsman