Skip to content

Commit

Permalink
WIP refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
ddl-ebrown committed May 27, 2024
1 parent 3e51931 commit 20a53d7
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 19 deletions.
6 changes: 5 additions & 1 deletion cmd/stattocsv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package main
// (e.g. it may be the number of days since January 1, 1960).

import (
"encoding/csv"
"fmt"
"os"
"strings"
Expand Down Expand Up @@ -61,5 +62,8 @@ func main() {
rdr = stata
}

datareader.DoConversion(rdr)
err = datareader.ToCsv(rdr, 1000, csv.NewWriter(os.Stdout))
if err != nil {
panic(err)
}
}
35 changes: 17 additions & 18 deletions convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,25 @@ package datareader

import (
"encoding/csv"
"errors"
"fmt"
"io"
"os"
"time"
)

func DoConversion(rdr StatfileReader) {

w := csv.NewWriter(os.Stdout)

func ToCsv(rdr StatfileReader, rows int, w *csv.Writer) error {
ncol := len(rdr.ColumnNames())
if err := w.Write(rdr.ColumnNames()); err != nil {
panic(err)
return err
}

row := make([]string, ncol)

for {
chunk, err := rdr.Read(1000)
if err != nil && err != io.EOF {
panic(err)
} else if chunk == nil || err == io.EOF {
chunk, err := rdr.Read(rows)
if err != nil && !errors.Is(err, io.EOF) {
return err
} else if chunk == nil || errors.Is(err, io.EOF) {
break
}

Expand All @@ -39,7 +36,7 @@ func DoConversion(rdr StatfileReader) {

missing := make([][]bool, ncol)

for j := 0; j < ncol; j++ {
for j := range ncol {
missing[j] = chunk[j].Missing()
dcol := chunk[j].Data()
switch v := dcol.(type) {
Expand All @@ -50,37 +47,39 @@ func DoConversion(rdr StatfileReader) {
case []string:
stringcols[j] = v
default:
panic(fmt.Sprintf("unknown type: %T", dcol))
return fmt.Errorf("unknown type: %T", dcol)
}
}

for i := range nrow {
for j := range ncol {
if numbercols[j] != nil {
switch {
case numbercols[j] != nil:
if missing[j] == nil || !missing[j][i] {
row[j] = fmt.Sprintf("%f", numbercols[j][i])
} else {
row[j] = ""
}
} else if stringcols[j] != nil {
case stringcols[j] != nil:
if missing[j] == nil || !missing[j][i] {
row[j] = stringcols[j][i]
} else {
row[j] = ""
}
} else if timecols[j] != nil {
case timecols[j] != nil:
if missing[j] == nil || !missing[j][i] {
row[j] = fmt.Sprintf("%v", timecols[j][i])
row[j] = timecols[j][i].String()
} else {
row[j] = ""
}
}
}
if err := w.Write(row); err != nil {
panic(err)
if werr := w.Write(row); werr != nil {
return werr
}
}
}

w.Flush()
return nil
}
74 changes: 74 additions & 0 deletions convert_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package datareader_test

import (
"bytes"
"encoding/csv"
"os"
"path/filepath"
"testing"

"github.com/dominodatalab/datareader"
"github.com/stretchr/testify/require"
)

type testFile struct {
path string
}

func TestToCsvConvertsSAS(t *testing.T) {
files, err := filepath.Glob("test_files/data/*.sas7bdat")
require.NoError(t, err)

testcases := map[string]testFile{}
for _, f := range files {
testcases["converts "+f] = testFile{path: f}
}

for name, tt := range testcases {
t.Run(name, func(t *testing.T) {
f, err := os.Open(tt.path)
require.NoError(t, err)
defer f.Close()

sas, err := datareader.NewSAS7BDATReader(f)
require.NoError(t, err)
sas.ConvertDates = true
sas.TrimStrings = true

buf := new(bytes.Buffer)
w := csv.NewWriter(buf)
err = datareader.ToCsv(sas, 100, w)

require.NoError(t, err)
})
}
}

func TestToCsvConvertsStata(t *testing.T) {
files, err := filepath.Glob("test_files/data/*.dta")
require.NoError(t, err)

testcases := map[string]testFile{}
for _, f := range files {
testcases["converts "+f] = testFile{path: f}
}

for name, tt := range testcases {
t.Run(name, func(t *testing.T) {
f, err := os.Open(tt.path)
require.NoError(t, err)
defer f.Close()

stata, err := datareader.NewStataReader(f)
require.NoError(t, err)
stata.ConvertDates = true
stata.InsertCategoryLabels = true
stata.InsertStrls = true

buf := new(bytes.Buffer)
w := csv.NewWriter(buf)
err = datareader.ToCsv(stata, 100, w)
require.NoError(t, err)
})
}
}
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,25 @@ go 1.22.3

require (
github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.9.0
golang.org/x/text v0.15.0
gotest.tools/gotestsum v1.11.0
)

require (
github.com/bitfield/gotestdox v0.2.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dnephin/pflag v1.0.7 // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/fsnotify/fsnotify v1.5.4 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/mod v0.12.0 // indirect
golang.org/x/sync v0.3.0 // indirect
golang.org/x/sys v0.10.0 // indirect
golang.org/x/term v0.10.0 // indirect
golang.org/x/tools v0.11.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
github.com/bitfield/gotestdox v0.2.1 h1:Zj8IMLAO5/oiAKoMmtN96eyFiPZraJRTH2p0zDgtxc0=
github.com/bitfield/gotestdox v0.2.1/go.mod h1:D+gwtS0urjBrzguAkTM2wodsTQYFHdpx8eqRJ3N+9pY=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dnephin/pflag v1.0.7 h1:oxONGlWxhmUct0YzKTgrpQv9AUA1wtPBn7zuSjJqptk=
github.com/dnephin/pflag v1.0.7/go.mod h1:uxE91IoWURlOiTUIA8Mq5ZZkAv3dPUfZNaT80Zm7OQE=
github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs=
Expand All @@ -19,9 +21,13 @@ github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APP
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
Expand Down Expand Up @@ -89,6 +95,10 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/gotestsum v1.11.0 h1:A88/QWw7acMjZH1dMe6KZFhw32odUOIjCiAU/Q4n3mI=
gotest.tools/gotestsum v1.11.0/go.mod h1:cUOKgFEvWAP0twchmiOvdzX0SBZX0UI58bGRpRIu4xs=
gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=
Expand Down

0 comments on commit 20a53d7

Please sign in to comment.