Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tables: fix prefix index, when the charset is utf8, truncate it from runes (#7109) #7126

Merged
merged 1 commit into from
Jul 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ddl/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ func buildIndexColumns(columns []*model.ColumnInfo, idxColNames []*ast.IndexColN
Name: col.Name,
Offset: col.Offset,
Length: ic.Length,
Tp: &col.FieldType,
})
}

Expand Down
14 changes: 14 additions & 0 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3352,3 +3352,17 @@ func (s *testIntegrationSuite) TestTwoDecimalAssignTruncate(c *C) {
res := tk.MustQuery("select a, b from t1")
res.Check(testkit.Rows("123.12345 123.1"))
}

func (s *testIntegrationSuite) TestPrefixIndex(c *C) {
tk := testkit.NewTestKit(c, s.store)
defer s.cleanEnv(c)
tk.MustExec("use test")
tk.MustExec(`CREATE TABLE t1 (
name varchar(12) DEFAULT NULL,
KEY pname (name(12))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci`)

tk.MustExec("insert into t1 values('借款策略集_网页');")
res := tk.MustQuery("select * from t1 where name = '借款策略集_网页';")
res.Check(testkit.Rows("借款策略集_网页"))
}
2 changes: 2 additions & 0 deletions model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ type IndexColumn struct {
// for indexing;
// UnspecifedLength if not using prefix indexing
Length int `json:"length"`
// Tp is the index column field type.
Tp *types.FieldType
}

// Clone clones IndexColumn.
Expand Down
39 changes: 29 additions & 10 deletions table/tables/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"bytes"
"encoding/binary"
"io"
"unicode/utf8"

"github.com/juju/errors"
"github.com/pingcap/tidb/kv"
Expand All @@ -26,6 +27,7 @@ import (
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/tablecodec"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/charset"
"github.com/pingcap/tidb/util/codec"
)

Expand Down Expand Up @@ -125,6 +127,32 @@ func (c *index) getIndexKeyBuf(buf []byte, defaultCap int) []byte {
return make([]byte, 0, defaultCap)
}

// truncateIndexValuesIfNeeded truncate the index values that be created that use only the leading part of column values.
func (c *index) truncateIndexValuesIfNeeded(indexedValues []types.Datum) []types.Datum {
for i := 0; i < len(indexedValues); i++ {
v := &indexedValues[i]
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
ic := c.idxInfo.Columns[i]
if ic.Tp.Charset == charset.CharsetUTF8 || ic.Tp.Charset == charset.CharsetUTF8MB4 {
val := v.GetBytes()
if ic.Length != types.UnspecifiedLength && utf8.RuneCount(val) > ic.Length {
rs := bytes.Runes(val)
truncateStr := string(rs[:ic.Length])
// truncate value and limit its length
v.SetString(truncateStr)
}
} else {
if ic.Length != types.UnspecifiedLength && len(v.GetBytes()) > ic.Length {
// truncate value and limit its length
v.SetBytes(v.GetBytes()[:ic.Length])
}
}
}
}

return indexedValues
}

// GenIndexKey generates storage key for index values. Returned distinct indicates whether the
// indexed values should be distinct in storage (i.e. whether handle is encoded in the key).
func (c *index) GenIndexKey(sc *stmtctx.StatementContext, indexedValues []types.Datum, h int64, buf []byte) (key []byte, distinct bool, err error) {
Expand All @@ -144,16 +172,7 @@ func (c *index) GenIndexKey(sc *stmtctx.StatementContext, indexedValues []types.

// For string columns, indexes can be created that use only the leading part of column values,
// using col_name(length) syntax to specify an index prefix length.
for i := 0; i < len(indexedValues); i++ {
v := &indexedValues[i]
if v.Kind() == types.KindString || v.Kind() == types.KindBytes {
ic := c.idxInfo.Columns[i]
if ic.Length != types.UnspecifiedLength && len(v.GetBytes()) > ic.Length {
// truncate value and limit its length
v.SetBytes(v.GetBytes()[:ic.Length])
}
}
}
indexedValues = c.truncateIndexValuesIfNeeded(indexedValues)
key = c.getIndexKeyBuf(buf, len(c.prefix)+len(indexedValues)*9+9)
key = append(key, []byte(c.prefix)...)
key, err = codec.EncodeKey(sc, key, indexedValues...)
Expand Down
4 changes: 2 additions & 2 deletions table/tables/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ func (s *testIndexSuite) TestCombineIndexSeek(c *C) {
ID: 2,
Name: model.NewCIStr("test"),
Columns: []*model.IndexColumn{
{},
{},
{Tp: &types.FieldType{}},
{Tp: &types.FieldType{}},
},
},
},
Expand Down