gogen-avro icon indicating copy to clipboard operation
gogen-avro copied to clipboard

Union usability improvements

Open ahawtho opened this issue 3 years ago • 0 comments

Including a NewXxxUnionTttt and/or SetTttt for each union type that sets the appropriate type enum value would significantly improve usability. Currently, many of the straightforward approaches for setting a non-default value produce panic()'s.

See the end for a strawman for starting discussion.

Schema:

{
  "type": "record",
  "name": "rec",
  "fields": [
    {
      "name": "field1",
      "type": ["null", "string"],
      "default": null
    }
  ]
}

In v10, this produces the following code:


type Rec struct {
	Field1 *Field1Union `json:"field1"`
}
//...
func NewRec() Rec {
	r := Rec{}
	r.Field1 = NewField1Union()

	r.Field1 = nil
	return r
}
//...

type Field1Union struct {
	Null      *types.NullVal
	String    string
	UnionType Field1UnionTypeEnum
}

func NewField1Union() *Field1Union {
	return &Field1Union{}
}
//...
func (_ *Field1Union) SetString(v string)  { panic("Unsupported operation") }

Test code:

func TestCreateRec(t *testing.T) {
	tests := []struct {
		name   string
		create func() structs.Rec
		want   string
	}{
		{
			name: "default struct",
			create: func() structs.Rec {
				return structs.Rec{
					Field1: &structs.Field1Union{
						// Default value of Field1UnionType == 0, which is invalid
						String: "test1",
					},
				}
			},
			want: "test1",
		},
		{
			name: "NewRec()",
			create: func() structs.Rec {
				r := structs.NewRec()
				// the Field1 struct starts as nil, so any invocations on Field1 panic
				r.Field1.String = "test2"
				return r
			},
			want: "test2",
		},
		{
			name: "NewField1Union()",
			create: func() structs.Rec {
				r := structs.NewRec()
				r.Field1 = structs.NewField1Union()
				// a union returned by NewField1Union() is always invalid due to default enum type
				r.Field1.String = "test3"
				return r
			},
			want: "test3",
		},
		{
			name: "SetString()",
			create: func() structs.Rec {
				r := structs.NewRec()
				r.Field1 = structs.NewField1Union()
				// SetString() is unsupported, so this panics.
				r.Field1.SetString("test4")
				return r
			},
			want: "test4",
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			defer func() {
				e := recover()
				if e != nil {
					t.Errorf("test panic'd: %v", e)
				}
			}()
			sr := tt.create()
			b := &bytes.Buffer{}
			err := sr.Serialize(b)
			if err != nil {
				t.Errorf("error serializing Rec: %v", err)
			}
			dr, err := structs.DeserializeRec(b)
			if err != nil {
				t.Errorf("error deserializing Rec: %v", err)
			}
			if got := dr.Field1.String; got != tt.want {
				t.Errorf("String() = %v, want %v", got, tt.want)
			}
		})
	}
}

E.g., for the example above, something like the following would improve usability:

func NewField1UnionNull() *Field1Union {
	return nil
}
func NewField1UnionString(v string) *Field1Union {
	return &Field1Union{
		String: v,
		UnionType: Field1UnionTypeEnumString,
	}
}

func (s *Field1Union) SetString(v string)  {
	s.String = v
	s.UnionType = Field1UnionTypeEnumString
}
// SetNull() can't be implemented with current generated code, but if serialization also
// supported an empty union struct, the implementation could be as follows. However, the
// current generated code provides better protection against unintentional data loss (e.g.
// a developer sets the field but not the UnionType)
func (s *Field1Union) SetNull()  {
	s.String = ""
	s.UnionType = 0
}

ahawtho avatar Nov 01 '21 14:11 ahawtho