From 077b6ab219b9516f102281c4a50adee96aecb034 Mon Sep 17 00:00:00 2001 From: Daniel Fireman Date: Mon, 20 May 2019 22:31:02 -0300 Subject: [PATCH 1/4] Add table.ReadColumn --- csv/table.go | 23 +++++++++++++++++++++++ csv/table_test.go | 25 +++++++++++++++++++++++++ table/table.go | 27 +++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/csv/table.go b/csv/table.go index b987977..bb84305 100644 --- a/csv/table.go +++ b/csv/table.go @@ -82,6 +82,29 @@ func (table *Table) Headers() []string { return table.headers } +// ReadColumn reads a specific column from the table and return it as strings. +func (table *Table) ReadColumn(name string) ([]string, error) { + index := -1 + for i, h := range table.headers { + if name == h { + index = i + break + } + } + if index == -1 { + return nil, fmt.Errorf("column name \"%s\" not found in headers", name) + } + iter, err := table.Iter() + if err != nil { + return nil, fmt.Errorf("error creating iterator:%q", err) + } + var col []string + for iter.Next() { + col = append(col, iter.Row()[index]) + } + return col, nil +} + // String returns a string version of the table. func (table *Table) String() string { var buf bytes.Buffer diff --git a/csv/table_test.go b/csv/table_test.go index 399651c..d65f25c 100644 --- a/csv/table_test.go +++ b/csv/table_test.go @@ -34,6 +34,13 @@ func ExampleTable_ReadAll() { // Output:[[foo] [bar]] } +func ExampleTable_ReadColumn() { + table, _ := NewTable(FromString("name,age\nfoo,25\nbar,48"), LoadHeaders()) + cols, _ := table.ReadColumn("name") + fmt.Print(cols) + // Output:[foo bar] +} + func ExampleNewWriter() { var buf bytes.Buffer w := NewWriter(&buf) @@ -163,3 +170,21 @@ func TestString(t *testing.T) { is.NoErr(err) is.Equal(want, table.String()) } + +func TestReadColumn(t *testing.T) { + t.Run("HeaderNotFound", func(t *testing.T) { + is := is.New(t) + tab, err := NewTable(FromString("name\nfoo"), LoadHeaders()) + is.NoErr(err) + _, err = tab.ReadColumn("age") + is.True(err != nil) // Must err as there is no column called age. + }) + t.Run("ErrorCreatingIter", func(t *testing.T) { + is := is.New(t) + tab, err := NewTable(errorSource()) + is.NoErr(err) + tab.headers = []string{"age"} + _, err = tab.ReadColumn("age") + is.True(err != nil) // Must err as the source will error. + }) +} diff --git a/table/table.go b/table/table.go index 0983369..bd4a357 100644 --- a/table/table.go +++ b/table/table.go @@ -19,6 +19,7 @@ package table import ( "bytes" "encoding/csv" + "fmt" ) // Table provides functionality to iterate and write tabular data. This is the logical @@ -34,6 +35,9 @@ type Table interface { // ReadAll reads all rows from the table and return it as strings. ReadAll() ([][]string, error) + + // ReadColumn reads a specific column from the table and return it as strings. + ReadColumn(string) ([]string, error) } // A Writer writes rows to a table file. The idea behind the writer is to @@ -100,6 +104,29 @@ func (t *SliceTable) Iter() (Iterator, error) { return &sliceIterator{content: t.content}, nil } +// ReadColumn reads a specific column from the table and return it as strings. +func (t *SliceTable) ReadColumn(name string) ([]string, error) { + index := -1 + for i, h := range t.headers { + if name == h { + index = i + break + } + } + if index == -1 { + return nil, fmt.Errorf("column name \"%s\" not found in headers", name) + } + iter, err := t.Iter() + if err != nil { + return nil, fmt.Errorf("error creating iterator:%q", err) + } + var col []string + for iter.Next() { + col = append(col, iter.Row()[index]) + } + return col, nil +} + type sliceIterator struct { content [][]string pos int From 915c8eaf869bcb4fde99deb2c406d6e52f4b7b0e Mon Sep 17 00:00:00 2001 From: Daniel Fireman Date: Mon, 20 May 2019 22:31:13 -0300 Subject: [PATCH 2/4] Add sch.CastColumn --- schema/schema.go | 35 +++++++++++++++++++++++++++++++++++ schema/schema_test.go | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/schema/schema.go b/schema/schema.go index 246f627..f91a254 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -405,6 +405,41 @@ func extractUniqueFieldIndexes(s *Schema) []int { return keys } +// CastColumn loads and casts all rows from a single column. +// +// The result argument must necessarily be the address for a slice. The slice +// may be nil or previously allocated. +func (s *Schema) CastColumn(col []string, name string, out interface{}) error { + outv := reflect.ValueOf(out) + if outv.Kind() != reflect.Ptr || outv.Elem().Kind() != reflect.Slice { + return fmt.Errorf("out argument must be a slice address") + } + f, err := s.GetField(name) + if err == InvalidPosition { + return fmt.Errorf("invalid field name \"%s\"", name) + } + slicev := outv.Elem() + slicev = slicev.Slice(0, 0) // Trucantes the passed-in slice. + elemt := slicev.Type().Elem() + for _, v := range col { + cast, err := f.Cast(v) + if err != nil { + return fmt.Errorf("error casting column value(%s):%q", v, err) + } + toSetValue := reflect.ValueOf(cast) + toSetType := toSetValue.Type() + if !toSetType.ConvertibleTo(elemt) { + return fmt.Errorf("value:%s field:%s - can not convert from %v to %v", v, f.Name, toSetType, elemt) + } + elem := reflect.New(elemt).Elem() + elem.Set(toSetValue.Convert(elemt)) + slicev = reflect.Append(slicev, elem) + slicev = slicev.Slice(0, slicev.Len()) + } + outv.Elem().Set(slicev.Slice(0, len(col))) + return nil +} + // UncastTable uncasts each element (struct) of the passed-in slice and func (s *Schema) UncastTable(in interface{}) ([][]string, error) { inVal := reflect.Indirect(reflect.ValueOf(in)) diff --git a/schema/schema_test.go b/schema/schema_test.go index 0867b05..960d45f 100644 --- a/schema/schema_test.go +++ b/schema/schema_test.go @@ -118,6 +118,24 @@ func ExampleSchema_UncastTable() { // Bar,43 } +func ExampleSchema_CastColumn() { + // Lets assume we have a schema ... + s := Schema{Fields: []Field{{Name: "Name", Type: StringType}, {Name: "Age", Type: IntegerType, Constraints: Constraints{Unique: true}}}} + // And a Table. + t := table.FromSlices([]string{"Name", "Age"}, [][]string{ + {"Foo", "42"}, + {"Bar", "43"}}) + // And we would like to process the column Age using Go types. First we need to create a + // slice to hold the column contents. + var ages []float64 + // Extract the column. + col, _ := t.ReadColumn("Age") + // And profit! + s.CastColumn(col, "Age", &ages) + fmt.Print(ages) + // Output: [42 43] +} + func TestLoadRemote(t *testing.T) { is := is.New(t) h := func(w http.ResponseWriter, r *http.Request) { @@ -644,3 +662,27 @@ func TestSchema_CastRow(t *testing.T) { is.True(sch.CastRow([]string{"Foo", "42", "boo"}, &rec) != nil) // More columns than #Fields is.True(sch.CastRow([]string{"Foo"}, &rec) != nil) // Less columns than #Fields } + +func TestSchema_CastColumn(t *testing.T) { + sch := Schema{Fields: []Field{{Name: "Age", Type: IntegerType}}} + t.Run("ErrorArgMustBeSlicePointer", func(t *testing.T) { + is := is.New(t) + var f []string + is.True(sch.CastColumn([]string{"42"}, "Age", f) != nil) // Must err because f is not a pointer. + }) + t.Run("ErrorInvalidFieldName", func(t *testing.T) { + is := is.New(t) + var f []string + is.True(sch.CastColumn([]string{"42"}, "Name", f) != nil) // Must err because Name is not a valid field. + }) + t.Run("ErrorCanNotCast", func(t *testing.T) { + is := is.New(t) + var f []int + is.True(sch.CastColumn([]string{"Foo"}, "Age", f) != nil) // Must err because Foo can not be cast to integer. + }) + t.Run("ErrorCanNotConvert", func(t *testing.T) { + is := is.New(t) + var f []string + is.True(sch.CastColumn([]string{"42"}, "Age", f) != nil) // Must err because 42 (integer) can not be converted to string. + }) +} From 0a60b46c01bc0980dc9f32c442ef33f1c3633c1f Mon Sep 17 00:00:00 2001 From: Daniel Fireman Date: Mon, 20 May 2019 22:44:55 -0300 Subject: [PATCH 3/4] Fix and transform tests in table-driven tests --- schema/schema_test.go | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/schema/schema_test.go b/schema/schema_test.go index 960d45f..2d14205 100644 --- a/schema/schema_test.go +++ b/schema/schema_test.go @@ -665,24 +665,24 @@ func TestSchema_CastRow(t *testing.T) { func TestSchema_CastColumn(t *testing.T) { sch := Schema{Fields: []Field{{Name: "Age", Type: IntegerType}}} - t.Run("ErrorArgMustBeSlicePointer", func(t *testing.T) { - is := is.New(t) - var f []string - is.True(sch.CastColumn([]string{"42"}, "Age", f) != nil) // Must err because f is not a pointer. - }) - t.Run("ErrorInvalidFieldName", func(t *testing.T) { - is := is.New(t) - var f []string - is.True(sch.CastColumn([]string{"42"}, "Name", f) != nil) // Must err because Name is not a valid field. - }) - t.Run("ErrorCanNotCast", func(t *testing.T) { - is := is.New(t) - var f []int - is.True(sch.CastColumn([]string{"Foo"}, "Age", f) != nil) // Must err because Foo can not be cast to integer. - }) - t.Run("ErrorCanNotConvert", func(t *testing.T) { - is := is.New(t) - var f []string - is.True(sch.CastColumn([]string{"42"}, "Age", f) != nil) // Must err because 42 (integer) can not be converted to string. + t.Run("Errors", func(t *testing.T) { + data := []struct { + desc string + col []string + colName string + out interface{} + }{ + {"OutNotASlicePointer", []string{"42"}, "Age", []int{}}, + {"InvalidFieldName", []string{"42"}, "Name", &([]string{})}, + {"CanNotCast", []string{"Foo"}, "Age", &([]int{})}, + {"CanNotConvert", []string{"42"}, "Age", &([]*int{})}, + } + for _, d := range data { + t.Run(d.desc, func(t *testing.T) { + if sch.CastColumn(d.col, d.colName, d.out) == nil { + t.Errorf("want:err got:nil col:%v colName:%v out:%v", d.col, d.colName, d.out) + } + }) + } }) } From c860bb22ac14b14a0e57aae961b4874f436e4617 Mon Sep 17 00:00:00 2001 From: Daniel Fireman Date: Mon, 20 May 2019 22:45:11 -0300 Subject: [PATCH 4/4] Adding more comments to schema.CastColumn --- schema/schema.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schema/schema.go b/schema/schema.go index f91a254..724d489 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -419,8 +419,8 @@ func (s *Schema) CastColumn(col []string, name string, out interface{}) error { return fmt.Errorf("invalid field name \"%s\"", name) } slicev := outv.Elem() - slicev = slicev.Slice(0, 0) // Trucantes the passed-in slice. - elemt := slicev.Type().Elem() + slicev = slicev.Slice(0, 0) // Trucantes the passed-in slice. + elemt := slicev.Type().Elem() // Last Elem() needed because the pointer type. for _, v := range col { cast, err := f.Cast(v) if err != nil {