diff --git a/cmd/stattocsv/main.go b/cmd/stattocsv/main.go index 321f48d..64743f2 100644 --- a/cmd/stattocsv/main.go +++ b/cmd/stattocsv/main.go @@ -6,6 +6,7 @@ package main // (e.g. it may be the number of days since January 1, 1960). import ( + "encoding/csv" "fmt" "os" "strings" @@ -61,5 +62,8 @@ func main() { rdr = stata } - datareader.DoConversion(rdr) + err = datareader.ToCsv(rdr, 1000, csv.NewWriter(os.Stdout)) + if err != nil { + panic(err) + } } diff --git a/convert.go b/convert.go index 30f0032..ab293cd 100644 --- a/convert.go +++ b/convert.go @@ -2,28 +2,25 @@ package datareader import ( "encoding/csv" + "errors" "fmt" "io" - "os" "time" ) -func DoConversion(rdr StatfileReader) { - - w := csv.NewWriter(os.Stdout) - +func ToCsv(rdr StatfileReader, rows int, w *csv.Writer) error { ncol := len(rdr.ColumnNames()) if err := w.Write(rdr.ColumnNames()); err != nil { - panic(err) + return err } row := make([]string, ncol) for { - chunk, err := rdr.Read(1000) - if err != nil && err != io.EOF { - panic(err) - } else if chunk == nil || err == io.EOF { + chunk, err := rdr.Read(rows) + if err != nil && !errors.Is(err, io.EOF) { + return err + } else if chunk == nil || errors.Is(err, io.EOF) { break } @@ -39,7 +36,7 @@ func DoConversion(rdr StatfileReader) { missing := make([][]bool, ncol) - for j := 0; j < ncol; j++ { + for j := range ncol { missing[j] = chunk[j].Missing() dcol := chunk[j].Data() switch v := dcol.(type) { @@ -50,37 +47,39 @@ func DoConversion(rdr StatfileReader) { case []string: stringcols[j] = v default: - panic(fmt.Sprintf("unknown type: %T", dcol)) + return fmt.Errorf("unknown type: %T", dcol) } } for i := range nrow { for j := range ncol { - if numbercols[j] != nil { + switch { + case numbercols[j] != nil: if missing[j] == nil || !missing[j][i] { row[j] = fmt.Sprintf("%f", numbercols[j][i]) } else { row[j] = "" } - } else if stringcols[j] != nil { + case stringcols[j] != nil: if missing[j] == nil || !missing[j][i] { row[j] = stringcols[j][i] } else { row[j] = "" } - } else if timecols[j] != nil { + case timecols[j] != nil: if missing[j] == nil || !missing[j][i] { - row[j] = fmt.Sprintf("%v", timecols[j][i]) + row[j] = timecols[j][i].String() } else { row[j] = "" } } } - if err := w.Write(row); err != nil { - panic(err) + if werr := w.Write(row); werr != nil { + return werr } } } w.Flush() + return nil } diff --git a/convert_test.go b/convert_test.go new file mode 100644 index 0000000..f2dd8e3 --- /dev/null +++ b/convert_test.go @@ -0,0 +1,74 @@ +package datareader_test + +import ( + "bytes" + "encoding/csv" + "os" + "path/filepath" + "testing" + + "github.com/dominodatalab/datareader" + "github.com/stretchr/testify/require" +) + +type testFile struct { + path string +} + +func TestToCsvConvertsSAS(t *testing.T) { + files, err := filepath.Glob("test_files/data/*.sas7bdat") + require.NoError(t, err) + + testcases := map[string]testFile{} + for _, f := range files { + testcases["converts "+f] = testFile{path: f} + } + + for name, tt := range testcases { + t.Run(name, func(t *testing.T) { + f, err := os.Open(tt.path) + require.NoError(t, err) + defer f.Close() + + sas, err := datareader.NewSAS7BDATReader(f) + require.NoError(t, err) + sas.ConvertDates = true + sas.TrimStrings = true + + buf := new(bytes.Buffer) + w := csv.NewWriter(buf) + err = datareader.ToCsv(sas, 100, w) + + require.NoError(t, err) + }) + } +} + +func TestToCsvConvertsStata(t *testing.T) { + files, err := filepath.Glob("test_files/data/*.dta") + require.NoError(t, err) + + testcases := map[string]testFile{} + for _, f := range files { + testcases["converts "+f] = testFile{path: f} + } + + for name, tt := range testcases { + t.Run(name, func(t *testing.T) { + f, err := os.Open(tt.path) + require.NoError(t, err) + defer f.Close() + + stata, err := datareader.NewStataReader(f) + require.NoError(t, err) + stata.ConvertDates = true + stata.InsertCategoryLabels = true + stata.InsertStrls = true + + buf := new(bytes.Buffer) + w := csv.NewWriter(buf) + err = datareader.ToCsv(stata, 100, w) + require.NoError(t, err) + }) + } +} diff --git a/go.mod b/go.mod index 8220880..7ebab90 100644 --- a/go.mod +++ b/go.mod @@ -4,21 +4,25 @@ go 1.22.3 require ( github.com/pkg/errors v0.9.1 + github.com/stretchr/testify v1.9.0 golang.org/x/text v0.15.0 gotest.tools/gotestsum v1.11.0 ) require ( github.com/bitfield/gotestdox v0.2.1 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/dnephin/pflag v1.0.7 // indirect github.com/fatih/color v1.15.0 // indirect github.com/fsnotify/fsnotify v1.5.4 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.19 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect golang.org/x/mod v0.12.0 // indirect golang.org/x/sync v0.3.0 // indirect golang.org/x/sys v0.10.0 // indirect golang.org/x/term v0.10.0 // indirect golang.org/x/tools v0.11.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 3bfe423..adddb00 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/bitfield/gotestdox v0.2.1 h1:Zj8IMLAO5/oiAKoMmtN96eyFiPZraJRTH2p0zDgtxc0= github.com/bitfield/gotestdox v0.2.1/go.mod h1:D+gwtS0urjBrzguAkTM2wodsTQYFHdpx8eqRJ3N+9pY= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dnephin/pflag v1.0.7 h1:oxONGlWxhmUct0YzKTgrpQv9AUA1wtPBn7zuSjJqptk= github.com/dnephin/pflag v1.0.7/go.mod h1:uxE91IoWURlOiTUIA8Mq5ZZkAv3dPUfZNaT80Zm7OQE= github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= @@ -19,9 +21,13 @@ github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APP github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -89,6 +95,10 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/gotestsum v1.11.0 h1:A88/QWw7acMjZH1dMe6KZFhw32odUOIjCiAU/Q4n3mI= gotest.tools/gotestsum v1.11.0/go.mod h1:cUOKgFEvWAP0twchmiOvdzX0SBZX0UI58bGRpRIu4xs= gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=