Skip to content

Commit

Permalink
Move conversion code
Browse files Browse the repository at this point in the history
 - Extract the doConversion helper from stattocsv/main.go over to
   convert.go so that it can be more easily be reused
  • Loading branch information
ddl-ebrown committed May 26, 2024
1 parent 8973783 commit 9ef9588
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 81 deletions.
82 changes: 1 addition & 81 deletions cmd/stattocsv/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,93 +6,13 @@ package main
// (e.g. it may be the number of days since January 1, 1960).

import (
"encoding/csv"
"fmt"
"io"
"os"
"strings"
"time"

"github.com/dominodatalab/datareader"
)

func doConversion(rdr datareader.StatfileReader) {

w := csv.NewWriter(os.Stdout)

ncol := len(rdr.ColumnNames())
if err := w.Write(rdr.ColumnNames()); err != nil {
panic(err)
}

row := make([]string, ncol)

for {
chunk, err := rdr.Read(1000)
if err != nil && err != io.EOF {
panic(err)
} else if chunk == nil || err == io.EOF {
break
}

for j := 0; j < len(chunk); j++ {
chunk[j] = chunk[j].UpcastNumeric()
}

nrow := chunk[0].Length()

numbercols := make([][]float64, ncol)
stringcols := make([][]string, ncol)
timecols := make([][]time.Time, ncol)

missing := make([][]bool, ncol)

for j := 0; j < ncol; j++ {
missing[j] = chunk[j].Missing()
dcol := chunk[j].Data()
switch v := dcol.(type) {
case []time.Time:
timecols[j] = v
case []float64:
numbercols[j] = v
case []string:
stringcols[j] = v
default:
panic(fmt.Sprintf("unknown type: %T", dcol))
}
}

for i := range nrow {
for j := range ncol {
if numbercols[j] != nil {
if missing[j] == nil || !missing[j][i] {
row[j] = fmt.Sprintf("%f", numbercols[j][i])
} else {
row[j] = ""
}
} else if stringcols[j] != nil {
if missing[j] == nil || !missing[j][i] {
row[j] = stringcols[j][i]
} else {
row[j] = ""
}
} else if timecols[j] != nil {
if missing[j] == nil || !missing[j][i] {
row[j] = fmt.Sprintf("%v", timecols[j][i])
} else {
row[j] = ""
}
}
}
if err := w.Write(row); err != nil {
panic(err)
}
}
}

w.Flush()
}

func main() {

if len(os.Args) == 1 {
Expand Down Expand Up @@ -141,5 +61,5 @@ func main() {
rdr = stata
}

doConversion(rdr)
datareader.DoConversion(rdr)
}
86 changes: 86 additions & 0 deletions convert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package datareader

import (
"encoding/csv"
"fmt"
"io"
"os"
"time"
)

func DoConversion(rdr StatfileReader) {

w := csv.NewWriter(os.Stdout)

ncol := len(rdr.ColumnNames())
if err := w.Write(rdr.ColumnNames()); err != nil {
panic(err)
}

row := make([]string, ncol)

for {
chunk, err := rdr.Read(1000)
if err != nil && err != io.EOF {
panic(err)
} else if chunk == nil || err == io.EOF {
break
}

for j := 0; j < len(chunk); j++ {
chunk[j] = chunk[j].UpcastNumeric()
}

nrow := chunk[0].Length()

numbercols := make([][]float64, ncol)
stringcols := make([][]string, ncol)
timecols := make([][]time.Time, ncol)

missing := make([][]bool, ncol)

for j := 0; j < ncol; j++ {
missing[j] = chunk[j].Missing()
dcol := chunk[j].Data()
switch v := dcol.(type) {
case []time.Time:
timecols[j] = v
case []float64:
numbercols[j] = v
case []string:
stringcols[j] = v
default:
panic(fmt.Sprintf("unknown type: %T", dcol))
}
}

for i := range nrow {
for j := range ncol {
if numbercols[j] != nil {
if missing[j] == nil || !missing[j][i] {
row[j] = fmt.Sprintf("%f", numbercols[j][i])
} else {
row[j] = ""
}
} else if stringcols[j] != nil {
if missing[j] == nil || !missing[j][i] {
row[j] = stringcols[j][i]
} else {
row[j] = ""
}
} else if timecols[j] != nil {
if missing[j] == nil || !missing[j][i] {
row[j] = fmt.Sprintf("%v", timecols[j][i])
} else {
row[j] = ""
}
}
}
if err := w.Write(row); err != nil {
panic(err)
}
}
}

w.Flush()
}

0 comments on commit 9ef9588

Please sign in to comment.