simdjson-go
simdjson-go copied to clipboard
object.ForEach() - object: unexpected name tag }
There appears to be a bug in objectForEach where the iterator over-reads the tape looking for a name or doesn't check for the object ending, maybe. Given the following example, which is an attempt to do array-transparent field iteration:
package main
import (
"fmt"
"log"
"github.com/minio/simdjson-go"
)
func main() {
// Parse JSON:
pj, err := simdjson.Parse([]byte(`{
"items": [
{
"name": "jim",
"scores": [
{
"game":"golf",
"scores": ["one","two"]
},
{
"game":"badminton",
"scores":["zero",1,"six"]
},
[
{
"game": "nested for some reason?",
"scores": ["five"]
}
]
]
}
]
}`), nil)
if err != nil {
log.Fatal(err)
}
// Iterate each top level element.
traverseKeys := map[string]struct{}{"items": {}, "poop": {}}
var parse func(i simdjson.Iter) error
parse = func(i simdjson.Iter) error {
switch i.Type() {
case simdjson.TypeArray:
array, err := i.Array(nil)
if err != nil {
return err
}
array.ForEach(func(i simdjson.Iter) {
parse(i)
})
case simdjson.TypeObject:
obj, err := i.Object(nil)
if err != nil {
return err
}
scores := obj.FindKey("scores", nil)
if scores == nil || scores.Type != simdjson.TypeArray {
return obj.ForEach(func(_ []byte, i simdjson.Iter) {
parse(i)
}, traverseKeys)
}
array, err := scores.Iter.Array(nil)
if err != nil {
return err
}
array.ForEach(func(i simdjson.Iter) {
switch i.Type() {
case simdjson.TypeString, simdjson.TypeInt:
s, _ := i.StringCvt()
fmt.Println("Found score:", s)
case simdjson.TypeObject, simdjson.TypeArray:
parse(i)
}
})
default:
fmt.Println("Ignoring", i.Type())
}
return nil
}
if err := pj.ForEach(parse); err != nil {
log.Fatal(err)
}
// Output:
//Found score: one
//Found score: two
//Found score: zero
//Found score: 1
//Found score: six
//Found score: five
// 2022/03/18 00:44:08 object: unexpected name tag }
}
The problem appears to be here in parsed_object.go wherein the advance fails if not finding a string, but not making an allowance for the end of the object, while the actual check for the end of the object occurs later, here. If I'm reading this right, it would throw an error on the (valid) JSON of an empty object: {}.
Maybe something like this?
func (o *Object) ForEach(fn func(key []byte, i Iter), onlyKeys map[string]struct{}) error {
tmp := o.tape.Iter()
tmp.off = o.off
n := 0
for {
typ := tmp.Advance()
// Check for end of object
if typ == TypeNone {
if tmp.off == len(tmp.tape.Tape) {
return nil
}
return fmt.Errorf("object: unexpected tag: %v", tmp.t)
}
// We want name and at least one value.
if typ != TypeString {
return fmt.Errorf("object: expecting object name string, found: %v", tmp.t)
}
// if the next item is the end of the tape or beyond it, then something
// is wrong
if tmp.off+1 >= len(tmp.tape.Tape) {
return fmt.Errorf("unexpected end of object tape, expecting value")
}
// Get the name(object key)
offset := tmp.cur
length := tmp.tape.Tape[tmp.off]
name, err := tmp.tape.stringByteAt(offset, length)
if err != nil {
return fmt.Errorf("getting object name: %w", err)
}
// if the user has specified that only certain keys be iterated, make
// sure this one is among them
if len(onlyKeys) > 0 {
if _, ok := onlyKeys[string(name)]; !ok {
// otherwise skip the value
tmp.Advance()
continue
}
}
t := tmp.Advance()
if t == TypeNone {
return nil
}
fn(name, tmp)
n++
if n == len(onlyKeys) {
return nil
}
}
}