Skip to content

Commit

Permalink
feat: more capable csv consumer and producer
Browse files Browse the repository at this point in the history
This PR allows to use the CSV consumer and producer in a more versatile way.
There is no breaking change to the interface.

* fixes #263 (types built with an io.Reader should be able to produce
  CSV)

* csv/consumer can now consume CSV into *csv.Writer, io.Writer, io.ReaderFrom,
  encoding.BinaryUnmarshaler
* also supports the new CSVWriter interface, i.e. anything that can
  Write([]string) error like *csv.Writer
* also supports pointers with underlying type *[][]string, *[]byte and *string, not just
  *[]byte

* csv/producer can now produce CSV from *csv.Reader, io.Reader,
  io.WriterTo, encoding.BinaryMarshaler
* also supports the new CSVReader interface, i.e. anything that can
  Read() ([]string, error) like *csv.Reader
* also supports underlying types [][]string, []byte and string, not just
  []byte

* CSVConsumer and CSVProducer now stream CSV records whenever possible,
* like ByteStreamConsumer and Producer, added the CSVCloseStream()
  option

* added support to (optionally) configure the CSV format with CSVOpts,
  using the options made available by the standard library

* doc: documented the above in the exported func signatures
* test: added full unit test of the CSVConsumer and Producer

Signed-off-by: Frederic BIDON <fredbi@yahoo.com>
  • Loading branch information
fredbi committed Dec 13, 2023
1 parent c5040d4 commit 638ea78
Show file tree
Hide file tree
Showing 6 changed files with 1,034 additions and 56 deletions.
5 changes: 5 additions & 0 deletions bytestream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ func TestByteStreamProducer(t *testing.T) {
}

type binaryUnmarshalDummy struct {
err error
str string
}

Expand All @@ -398,6 +399,10 @@ type binaryUnmarshalDummyZeroAlloc struct {
}

func (b *binaryUnmarshalDummy) UnmarshalBinary(data []byte) error {
if b.err != nil {
return b.err
}

if len(data) == 0 {
return errors.New("no text given")
}
Expand Down
328 changes: 301 additions & 27 deletions csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,336 @@ package runtime

import (
"bytes"
"context"
"encoding"
"encoding/csv"
"errors"
"fmt"
"io"
"log"
"reflect"

"golang.org/x/sync/errgroup"
)

// CSVConsumer creates a new CSV consumer
func CSVConsumer() Consumer {
// CSVConsumer creates a new CSV consumer.
//
// The consumer consumes CSV records from a provided reader into the data passed by reference.
//
// CSVOpts options may be specified to alter the default CSV behavior on the reader and the writer side (e.g. separator, skip header, ...).
// The defaults are those of the standard library's csv.Reader and csv.Writer.
//
// Supported output underlying types and interfaces, prioritized in this order:
// - *csv.Writer
// - CSVWriter (writer options are ignored)
// - io.Writer (as raw bytes)
// - io.ReaderFrom (as raw bytes)
// - encoding.BinaryUnmarshaler (as raw bytes)
// - *[][]string (as a collection of records)
// - *[]byte (as raw bytes)
// - *string (a raw bytes)
//
// The consumer prioritizes situations where buffering the input is not required.
func CSVConsumer(opts ...CSVOpt) Consumer {
o := csvOptsWithDefaults(opts)

return ConsumerFunc(func(reader io.Reader, data interface{}) error {
if reader == nil {
return errors.New("CSVConsumer requires a reader")
}
if data == nil {
return errors.New("nil destination for CSVConsumer")
}

csvReader := csv.NewReader(reader)
writer, ok := data.(io.Writer)
if !ok {
return errors.New("data type must be io.Writer")
o.applyToReader(csvReader)
log.Printf("DEBUG: %#v", csvReader)
closer := defaultCloser
if o.closeStream {
if cl, isReaderCloser := reader.(io.Closer); isReaderCloser {
closer = cl.Close
}
}
csvWriter := csv.NewWriter(writer)
records, err := csvReader.ReadAll()
if err != nil {
defer func() {
_ = closer()
}()

switch destination := data.(type) {
case *csv.Writer:
csvWriter := destination
o.applyToWriter(csvWriter)

return pipeCSV(csvWriter, csvReader, o)

case CSVWriter:
csvWriter := destination
// no writer options available

return pipeCSV(csvWriter, csvReader, o)

case io.Writer:
csvWriter := csv.NewWriter(destination)
o.applyToWriter(csvWriter)

return pipeCSV(csvWriter, csvReader, o)

case io.ReaderFrom:
var buf bytes.Buffer
csvWriter := csv.NewWriter(&buf)
o.applyToWriter(csvWriter)
if err := bufferedCSV(csvWriter, csvReader, o); err != nil {
return err
}
_, err := destination.ReadFrom(&buf)

return err
}
for _, r := range records {
if err := csvWriter.Write(r); err != nil {

case encoding.BinaryUnmarshaler:
var buf bytes.Buffer
csvWriter := csv.NewWriter(&buf)
o.applyToWriter(csvWriter)
if err := bufferedCSV(csvWriter, csvReader, o); err != nil {
return err
}

return destination.UnmarshalBinary(buf.Bytes())

default:
// support *[][]string, *[]byte, *string
if ptr := reflect.TypeOf(data); ptr.Kind() != reflect.Ptr {
return errors.New("destination must be a pointer")
}

v := reflect.Indirect(reflect.ValueOf(data))
t := v.Type()

switch {
case t.Kind() == reflect.Slice && t.Elem().Kind() == reflect.Slice && t.Elem().Elem().Kind() == reflect.String:
csvWriter := &csvRecordsWriter{}
// writer options are ignored
if err := pipeCSV(csvWriter, csvReader, o); err != nil {
return err
}
v.Grow(len(csvWriter.records))
v.SetCap(len(csvWriter.records)) // in case Grow was unnessary, trim down the capacity
v.SetLen(len(csvWriter.records))
reflect.Copy(v, reflect.ValueOf(csvWriter.records))

return nil

case t.Kind() == reflect.Slice && t.Elem().Kind() == reflect.Uint8:
var buf bytes.Buffer
csvWriter := csv.NewWriter(&buf)
o.applyToWriter(csvWriter)
if err := bufferedCSV(csvWriter, csvReader, o); err != nil {
return err
}
v.SetBytes(buf.Bytes())

return nil

case t.Kind() == reflect.String:
var buf bytes.Buffer
csvWriter := csv.NewWriter(&buf)
o.applyToWriter(csvWriter)
if err := bufferedCSV(csvWriter, csvReader, o); err != nil {
return err
}
v.SetString(buf.String())

return nil

default:
return fmt.Errorf("%v (%T) is not supported by the CSVConsumer, %s",
data, data, "can be resolved by supporting CSVWriter/Writer/BinaryUnmarshaler interface",
)
}
}
csvWriter.Flush()
return nil
})
}

// CSVProducer creates a new CSV producer
func CSVProducer() Producer {
// CSVProducer creates a new CSV producer.
//
// The producer takes input data then writes as CSV to an output writer (essentially as a pipe).
//
// Supported input underlying types and interfaces, prioritized in this order:
// - *csv.Reader
// - CSVReader (reader options are ignored)
// - io.Reader
// - io.WriterTo
// - encoding.BinaryMarshaler
// - [][]string
// - []byte
// - string
//
// The producer prioritizes situations where buffering the input is not required.
func CSVProducer(opts ...CSVOpt) Producer {
o := csvOptsWithDefaults(opts)

return ProducerFunc(func(writer io.Writer, data interface{}) error {
if writer == nil {
return errors.New("CSVProducer requires a writer")
}
if data == nil {
return errors.New("nil data for CSVProducer")
}

dataBytes, ok := data.([]byte)
if !ok {
return errors.New("data type must be byte array")
csvWriter := csv.NewWriter(writer)
o.applyToWriter(csvWriter)
closer := defaultCloser
if o.closeStream {
if cl, isWriterCloser := writer.(io.Closer); isWriterCloser {
closer = cl.Close
}
}
defer func() {
_ = closer()
}()

csvReader := csv.NewReader(bytes.NewBuffer(dataBytes))
records, err := csvReader.ReadAll()
if err != nil {
return err
if rc, isDataCloser := data.(io.ReadCloser); isDataCloser {
defer rc.Close()
}
csvWriter := csv.NewWriter(writer)
for _, r := range records {
if err := csvWriter.Write(r); err != nil {

switch origin := data.(type) {
case *csv.Reader:
csvReader := origin
o.applyToReader(csvReader)

return pipeCSV(csvWriter, csvReader, o)

case CSVReader:
csvReader := origin
// no reader options available

return pipeCSV(csvWriter, csvReader, o)

case io.Reader:
csvReader := csv.NewReader(origin)
o.applyToReader(csvReader)

return pipeCSV(csvWriter, csvReader, o)

case io.WriterTo:
// async piping of the writes performed by WriteTo
r, w := io.Pipe()
csvReader := csv.NewReader(r)
o.applyToReader(csvReader)

pipe, _ := errgroup.WithContext(context.Background())
pipe.Go(func() error {
_, err := origin.WriteTo(w)
_ = w.Close()
return err
})

pipe.Go(func() error {
defer func() {
_ = r.Close()
}()

return pipeCSV(csvWriter, csvReader, o)
})

return pipe.Wait()

case encoding.BinaryMarshaler:
buf, err := origin.MarshalBinary()
if err != nil {
return err
}
rdr := bytes.NewBuffer(buf)
csvReader := csv.NewReader(rdr)

return bufferedCSV(csvWriter, csvReader, o)

default:
// support [][]string, []byte, string (or pointers to those)
v := reflect.Indirect(reflect.ValueOf(data))
t := v.Type()

switch {
case t.Kind() == reflect.Slice && t.Elem().Kind() == reflect.Slice && t.Elem().Elem().Kind() == reflect.String:
csvReader := &csvRecordsWriter{
records: make([][]string, v.Len()),
}
reflect.Copy(reflect.ValueOf(csvReader.records), v)

return pipeCSV(csvWriter, csvReader, o)

case t.Kind() == reflect.Slice && t.Elem().Kind() == reflect.Uint8:
buf := bytes.NewBuffer(v.Bytes())
csvReader := csv.NewReader(buf)
o.applyToReader(csvReader)

return bufferedCSV(csvWriter, csvReader, o)

case t.Kind() == reflect.String:
buf := bytes.NewBufferString(v.String())
csvReader := csv.NewReader(buf)
o.applyToReader(csvReader)

return bufferedCSV(csvWriter, csvReader, o)

default:
return fmt.Errorf("%v (%T) is not supported by the CSVProducer, %s",
data, data, "can be resolved by supporting CSVReader/Reader/BinaryMarshaler interface",
)
}
}
csvWriter.Flush()
return nil
})
}

// pipeCSV copies CSV records from a CSV reader to a CSV writer
func pipeCSV(csvWriter CSVWriter, csvReader CSVReader, opts csvOpts) error {
for ; opts.skippedLines > 0; opts.skippedLines-- {
_, err := csvReader.Read()
if err != nil {
if errors.Is(err, io.EOF) {
return nil
}

return err
}
}

for {
record, err := csvReader.Read()
if err != nil {
if errors.Is(err, io.EOF) {
break
}

return err
}

if err := csvWriter.Write(record); err != nil {
return err
}
}

csvWriter.Flush()

return csvWriter.Error()
}

// bufferedCSV copies CSV records from a CSV reader to a CSV writer,
// by first reading all records then writing them at once.
func bufferedCSV(csvWriter *csv.Writer, csvReader *csv.Reader, opts csvOpts) error {
for ; opts.skippedLines > 0; opts.skippedLines-- {
_, err := csvReader.Read()
if err != nil {
if errors.Is(err, io.EOF) {
return nil
}

return err
}
}

records, err := csvReader.ReadAll()
if err != nil {
return err
}

return csvWriter.WriteAll(records)
}
Loading

0 comments on commit 638ea78

Please sign in to comment.