Skip to content

Commit

Permalink
tables: fix prefix index, when the charset is utf8, truncate it from …
Browse files Browse the repository at this point in the history
…runes (#7109) (#7126)
  • Loading branch information
winkyao authored and shenli committed Jul 23, 2018
1 parent 5c61f4c commit 0338239
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 12 deletions.
1 change: 1 addition & 0 deletions ddl/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ func buildIndexColumns(columns []*model.ColumnInfo, idxColNames []*ast.IndexColN
Name: col.Name,
Offset: col.Offset,
Length: ic.Length,
Tp: &col.FieldType,
})
}

Expand Down
14 changes: 14 additions & 0 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3352,3 +3352,17 @@ func (s *testIntegrationSuite) TestTwoDecimalAssignTruncate(c *C) {
res := tk.MustQuery("select a, b from t1")
res.Check(testkit.Rows("123.12345 123.1"))
}

func (s *testIntegrationSuite) TestPrefixIndex(c *C) {
tk := testkit.NewTestKit(c, s.store)
defer s.cleanEnv(c)
tk.MustExec("use test")
tk.MustExec(`CREATE TABLE t1 (
name varchar(12) DEFAULT NULL,
KEY pname (name(12))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci`)

tk.MustExec("insert into t1 values('借款策略集_网页');")
res := tk.MustQuery("select * from t1 where name = '借款策略集_网页';")
res.Check(testkit.Rows("借款策略集_网页"))
}
2 changes: 2 additions & 0 deletions model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ type IndexColumn struct {
// for indexing;
// UnspecifedLength if not using prefix indexing
Length int `json:"length"`
// Tp is the index column field type.
Tp *types.FieldType
}

// Clone clones IndexColumn.
Expand Down
39 changes: 29 additions & 10 deletions table/tables/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"bytes"
"encoding/binary"
"io"
"unicode/utf8"

"github.com/juju/errors"
"github.com/pingcap/tidb/kv"
Expand All @@ -26,6 +27,7 @@ import (
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/charset"
"github.com/pingcap/tidb/util/codec"
)

Expand Down Expand Up @@ -125,6 +127,32 @@ func (c *index) getIndexKeyBuf(buf []byte, defaultCap int) []byte {
return make([]byte, 0, defaultCap)
}

// truncateIndexValuesIfNeeded truncate the index values that be created that use only the leading part of column values.
func (c *index) truncateIndexValuesIfNeeded(indexedValues []types.Datum) []types.Datum {
for i := 0; i < len(indexedValues); i++ {
v := &indexedValues[i]
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
ic := c.idxInfo.Columns[i]
if ic.Tp.Charset == charset.CharsetUTF8 || ic.Tp.Charset == charset.CharsetUTF8MB4 {
val := v.GetBytes()
if ic.Length != types.UnspecifiedLength && utf8.RuneCount(val) > ic.Length {
rs := bytes.Runes(val)
truncateStr := string(rs[:ic.Length])
// truncate value and limit its length
v.SetString(truncateStr)
}
} else {
if ic.Length != types.UnspecifiedLength && len(v.GetBytes()) > ic.Length {
// truncate value and limit its length
v.SetBytes(v.GetBytes()[:ic.Length])
}
}
}
}

return indexedValues
}

// GenIndexKey generates storage key for index values. Returned distinct indicates whether the
// indexed values should be distinct in storage (i.e. whether handle is encoded in the key).
func (c *index) GenIndexKey(sc *stmtctx.StatementContext, indexedValues []types.Datum, h int64, buf []byte) (key []byte, distinct bool, err error) {
Expand All @@ -144,16 +172,7 @@ func (c *index) GenIndexKey(sc *stmtctx.StatementContext, indexedValues []types.

// For string columns, indexes can be created that use only the leading part of column values,
// using col_name(length) syntax to specify an index prefix length.
for i := 0; i < len(indexedValues); i++ {
v := &indexedValues[i]
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
ic := c.idxInfo.Columns[i]
if ic.Length != types.UnspecifiedLength && len(v.GetBytes()) > ic.Length {
// truncate value and limit its length
v.SetBytes(v.GetBytes()[:ic.Length])
}
}
}
indexedValues = c.truncateIndexValuesIfNeeded(indexedValues)
key = c.getIndexKeyBuf(buf, len(c.prefix)+len(indexedValues)*9+9)
key = append(key, []byte(c.prefix)...)
key, err = codec.EncodeKey(sc, key, indexedValues...)
Expand Down
4 changes: 2 additions & 2 deletions table/tables/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ func (s *testIndexSuite) TestCombineIndexSeek(c *C) {
ID: 2,
Name: model.NewCIStr("test"),
Columns: []*model.IndexColumn{
{},
{},
{Tp: &types.FieldType{}},
{Tp: &types.FieldType{}},
},
},
},
Expand Down

0 comments on commit 0338239

Please sign in to comment.