yacr icon indicating copy to clipboard operation
yacr copied to clipboard

allow tag for unsafe code

Open ericlagergren opened this issue 8 years ago • 3 comments

I appreciate yacr and it's worked great, but it'd be even better if there was an -unsafe build tag.

Code from package unsafe can break at any time (not covered by Go's 1.0 guarantee) and it'd be rather unfortunate if the current implementation of something changes and peoples' code breaks.

IMO it's also bad form to use unsafe under peoples' noses

ericlagergren avatar May 31 '16 20:05 ericlagergren

Currently, there is no safe way to temporarily transform a string into a slice/byte array without copy (see here). But it is easy to remove unsafe usages:

diff --git a/writer.go b/writer.go
index 3766ee8..7e3c872 100644
--- a/writer.go
+++ b/writer.go
@@ -12,7 +12,6 @@ import (
        "io"
        "reflect"
        "strconv"
-       "unsafe"
 )

 // Writer provides an interface for writing CSV data
@@ -21,12 +20,10 @@ import (
 // The EndOfRecord method tells when a line break is inserted.
 type Writer struct {
        b      *bufio.Writer
-       sep    byte                 // values separator
-       quoted bool                 // specify if values should be quoted (when they contain a separator, a double-quote or a newline)
-       sor    bool                 // true at start of record
-       err    error                // sticky error.
-       bs     []byte               // byte slice used to write string with minimal/no alloc/copy
-       hb     *reflect.SliceHeader // header of bs
+       sep    byte  // values separator
+       quoted bool  // specify if values should be quoted (when they contain a separator, a double-quote or a newline)
+       sor    bool  // true at start of record
+       err    error // sticky error.

        UseCRLF bool // True to use \r\n as the line terminator
 }
@@ -39,7 +36,6 @@ func DefaultWriter(wr io.Writer) *Writer {
 // NewWriter returns a new CSV writer.
 func NewWriter(w io.Writer, sep byte, quoted bool) *Writer {
        wr := &Writer{b: bufio.NewWriter(w), sep: sep, quoted: quoted, sor: true}
-       wr.hb = (*reflect.SliceHeader)(unsafe.Pointer(&wr.bs))
        return wr
 }

@@ -114,12 +110,7 @@ func (w *Writer) writeReflect(value interface{}) bool {

 // WriteString ensures that value is quoted when needed.
 func (w *Writer) WriteString(value string) bool {
-       // To avoid making a copy...
-       hs := (*reflect.StringHeader)(unsafe.Pointer(&value))
-       w.hb.Data = hs.Data
-       w.hb.Len = hs.Len
-       w.hb.Cap = hs.Len
-       return w.Write(w.bs)
+       return w.Write([]byte(value))
 }

 var (

but performance is impacted:

Unsafe:
BenchmarkYacrWriter-4         200000         10780 ns/op      85.81 MB/s        2755 B/op          0 allocs/op
Safe:
BenchmarkYacrWriter-4         100000         16139 ns/op      57.31 MB/s        3955 B/op        126 allocs/op

Regards.

gwenn avatar Jun 01 '16 17:06 gwenn

Of course. Thus why I suggested using an -unsafe build tag to allow people who want performance to get it, and those who want safe code to get it by default.

ericlagergren avatar Jun 01 '16 18:06 ericlagergren

  • I will have to copy some code. And actually, I used the unsafe code to avoid copying this method for string.
  • No one will use the unsafe tag.

So maybe I should copy the method:

diff --git a/writer.go b/writer.go
index 3766ee8..c2aa512 100644
--- a/writer.go
+++ b/writer.go
@@ -12,7 +12,6 @@ import (
        "io"
        "reflect"
        "strconv"
-       "unsafe"
 )

 // Writer provides an interface for writing CSV data
@@ -21,12 +20,10 @@ import (
 // The EndOfRecord method tells when a line break is inserted.
 type Writer struct {
        b      *bufio.Writer
-       sep    byte                 // values separator
-       quoted bool                 // specify if values should be quoted (when they contain a separator, a double-quote or a newline)
-       sor    bool                 // true at start of record
-       err    error                // sticky error.
-       bs     []byte               // byte slice used to write string with minimal/no alloc/copy
-       hb     *reflect.SliceHeader // header of bs
+       sep    byte  // values separator
+       quoted bool  // specify if values should be quoted (when they contain a separator, a double-quote or a newline)
+       sor    bool  // true at start of record
+       err    error // sticky error.

        UseCRLF bool // True to use \r\n as the line terminator
 }
@@ -39,7 +36,6 @@ func DefaultWriter(wr io.Writer) *Writer {
 // NewWriter returns a new CSV writer.
 func NewWriter(w io.Writer, sep byte, quoted bool) *Writer {
        wr := &Writer{b: bufio.NewWriter(w), sep: sep, quoted: quoted, sor: true}
-       wr.hb = (*reflect.SliceHeader)(unsafe.Pointer(&wr.bs))
        return wr
 }

@@ -114,12 +110,59 @@ func (w *Writer) writeReflect(value interface{}) bool {

 // WriteString ensures that value is quoted when needed.
 func (w *Writer) WriteString(value string) bool {
-       // To avoid making a copy...
-       hs := (*reflect.StringHeader)(unsafe.Pointer(&value))
-       w.hb.Data = hs.Data
-       w.hb.Len = hs.Len
-       w.hb.Cap = hs.Len
-       return w.Write(w.bs)
+       if w.err != nil {
+               return false
+       }
+       if !w.sor {
+               w.setErr(w.b.WriteByte(w.sep))
+       }
+       // In quoted mode, value is enclosed between quotes if it contains sep, quote or \n.
+       sep := rune(w.sep)
+       if w.quoted {
+               last := 0
+               for i, c := range value {
+                       switch c {
+                       case '"', '\r', '\n', sep:
+                       default:
+                               continue
+                       }
+                       if last == 0 {
+                               w.setErr(w.b.WriteByte('"'))
+                       }
+                       if _, err := w.b.WriteString(value[last : i+1]); err != nil {
+                               w.setErr(err)
+                       }
+                       if c == '"' {
+                               w.setErr(w.b.WriteByte('"')) // escaped with another double quote
+                       }
+                       last = i + 1
+               }
+               if _, err := w.b.WriteString(value[last:]); err != nil {
+                       w.setErr(err)
+               }
+               if last != 0 {
+                       w.setErr(w.b.WriteByte('"'))
+               }
+       } else {
+               // check that value does not contain sep or \n
+               for _, c := range value {
+                       switch c {
+                       case '\n':
+                               w.setErr(ErrNewLine)
+                               return false
+                       case sep:
+                               w.setErr(ErrSeparator)
+                               return false
+                       default:
+                               continue
+                       }
+               }
+               if _, err := w.b.WriteString(value); err != nil {
+                       w.setErr(err)
+               }
+       }
+       w.sor = false
+       return w.err == nil
 }

 var (
BenchmarkYacrWriter-4         100000         13219 ns/op      69.97 MB/s        2755 B/op          0 allocs/op

gwenn avatar Jun 01 '16 18:06 gwenn