Skip to content

Commit

Permalink
Merge pull request #80 from frictionlessdata/castColumn
Browse files Browse the repository at this point in the history
Add schema.CastColumn
  • Loading branch information
danielfireman authored May 21, 2019
2 parents 53bb36c + c860bb2 commit f9bf389
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 0 deletions.
23 changes: 23 additions & 0 deletions csv/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,29 @@ func (table *Table) Headers() []string {
return table.headers
}

// ReadColumn reads a specific column from the table and return it as strings.
func (table *Table) ReadColumn(name string) ([]string, error) {
index := -1
for i, h := range table.headers {
if name == h {
index = i
break
}
}
if index == -1 {
return nil, fmt.Errorf("column name \"%s\" not found in headers", name)
}
iter, err := table.Iter()
if err != nil {
return nil, fmt.Errorf("error creating iterator:%q", err)
}
var col []string
for iter.Next() {
col = append(col, iter.Row()[index])
}
return col, nil
}

// String returns a string version of the table.
func (table *Table) String() string {
var buf bytes.Buffer
Expand Down
25 changes: 25 additions & 0 deletions csv/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ func ExampleTable_ReadAll() {
// Output:[[foo] [bar]]
}

func ExampleTable_ReadColumn() {
table, _ := NewTable(FromString("name,age\nfoo,25\nbar,48"), LoadHeaders())
cols, _ := table.ReadColumn("name")
fmt.Print(cols)
// Output:[foo bar]
}

func ExampleNewWriter() {
var buf bytes.Buffer
w := NewWriter(&buf)
Expand Down Expand Up @@ -163,3 +170,21 @@ func TestString(t *testing.T) {
is.NoErr(err)
is.Equal(want, table.String())
}

func TestReadColumn(t *testing.T) {
t.Run("HeaderNotFound", func(t *testing.T) {
is := is.New(t)
tab, err := NewTable(FromString("name\nfoo"), LoadHeaders())
is.NoErr(err)
_, err = tab.ReadColumn("age")
is.True(err != nil) // Must err as there is no column called age.
})
t.Run("ErrorCreatingIter", func(t *testing.T) {
is := is.New(t)
tab, err := NewTable(errorSource())
is.NoErr(err)
tab.headers = []string{"age"}
_, err = tab.ReadColumn("age")
is.True(err != nil) // Must err as the source will error.
})
}
35 changes: 35 additions & 0 deletions schema/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,41 @@ func extractUniqueFieldIndexes(s *Schema) []int {
return keys
}

// CastColumn loads and casts all rows from a single column.
//
// The result argument must necessarily be the address for a slice. The slice
// may be nil or previously allocated.
func (s *Schema) CastColumn(col []string, name string, out interface{}) error {
outv := reflect.ValueOf(out)
if outv.Kind() != reflect.Ptr || outv.Elem().Kind() != reflect.Slice {
return fmt.Errorf("out argument must be a slice address")
}
f, err := s.GetField(name)
if err == InvalidPosition {
return fmt.Errorf("invalid field name \"%s\"", name)
}
slicev := outv.Elem()
slicev = slicev.Slice(0, 0) // Trucantes the passed-in slice.
elemt := slicev.Type().Elem() // Last Elem() needed because the pointer type.
for _, v := range col {
cast, err := f.Cast(v)
if err != nil {
return fmt.Errorf("error casting column value(%s):%q", v, err)
}
toSetValue := reflect.ValueOf(cast)
toSetType := toSetValue.Type()
if !toSetType.ConvertibleTo(elemt) {
return fmt.Errorf("value:%s field:%s - can not convert from %v to %v", v, f.Name, toSetType, elemt)
}
elem := reflect.New(elemt).Elem()
elem.Set(toSetValue.Convert(elemt))
slicev = reflect.Append(slicev, elem)
slicev = slicev.Slice(0, slicev.Len())
}
outv.Elem().Set(slicev.Slice(0, len(col)))
return nil
}

// UncastTable uncasts each element (struct) of the passed-in slice and
func (s *Schema) UncastTable(in interface{}) ([][]string, error) {
inVal := reflect.Indirect(reflect.ValueOf(in))
Expand Down
42 changes: 42 additions & 0 deletions schema/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,24 @@ func ExampleSchema_UncastTable() {
// Bar,43
}

func ExampleSchema_CastColumn() {
// Lets assume we have a schema ...
s := Schema{Fields: []Field{{Name: "Name", Type: StringType}, {Name: "Age", Type: IntegerType, Constraints: Constraints{Unique: true}}}}
// And a Table.
t := table.FromSlices([]string{"Name", "Age"}, [][]string{
{"Foo", "42"},
{"Bar", "43"}})
// And we would like to process the column Age using Go types. First we need to create a
// slice to hold the column contents.
var ages []float64
// Extract the column.
col, _ := t.ReadColumn("Age")
// And profit!
s.CastColumn(col, "Age", &ages)
fmt.Print(ages)
// Output: [42 43]
}

func TestLoadRemote(t *testing.T) {
is := is.New(t)
h := func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -644,3 +662,27 @@ func TestSchema_CastRow(t *testing.T) {
is.True(sch.CastRow([]string{"Foo", "42", "boo"}, &rec) != nil) // More columns than #Fields
is.True(sch.CastRow([]string{"Foo"}, &rec) != nil) // Less columns than #Fields
}

func TestSchema_CastColumn(t *testing.T) {
sch := Schema{Fields: []Field{{Name: "Age", Type: IntegerType}}}
t.Run("Errors", func(t *testing.T) {
data := []struct {
desc string
col []string
colName string
out interface{}
}{
{"OutNotASlicePointer", []string{"42"}, "Age", []int{}},
{"InvalidFieldName", []string{"42"}, "Name", &([]string{})},
{"CanNotCast", []string{"Foo"}, "Age", &([]int{})},
{"CanNotConvert", []string{"42"}, "Age", &([]*int{})},
}
for _, d := range data {
t.Run(d.desc, func(t *testing.T) {
if sch.CastColumn(d.col, d.colName, d.out) == nil {
t.Errorf("want:err got:nil col:%v colName:%v out:%v", d.col, d.colName, d.out)
}
})
}
})
}
27 changes: 27 additions & 0 deletions table/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package table
import (
"bytes"
"encoding/csv"
"fmt"
)

// Table provides functionality to iterate and write tabular data. This is the logical
Expand All @@ -34,6 +35,9 @@ type Table interface {

// ReadAll reads all rows from the table and return it as strings.
ReadAll() ([][]string, error)

// ReadColumn reads a specific column from the table and return it as strings.
ReadColumn(string) ([]string, error)
}

// A Writer writes rows to a table file. The idea behind the writer is to
Expand Down Expand Up @@ -100,6 +104,29 @@ func (t *SliceTable) Iter() (Iterator, error) {
return &sliceIterator{content: t.content}, nil
}

// ReadColumn reads a specific column from the table and return it as strings.
func (t *SliceTable) ReadColumn(name string) ([]string, error) {
index := -1
for i, h := range t.headers {
if name == h {
index = i
break
}
}
if index == -1 {
return nil, fmt.Errorf("column name \"%s\" not found in headers", name)
}
iter, err := t.Iter()
if err != nil {
return nil, fmt.Errorf("error creating iterator:%q", err)
}
var col []string
for iter.Next() {
col = append(col, iter.Row()[index])
}
return col, nil
}

type sliceIterator struct {
content [][]string
pos int
Expand Down

0 comments on commit f9bf389

Please sign in to comment.