-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
vindexes: Efficient unicode hashing (#14395)
Signed-off-by: Vicent Marti <vmg@strn.cat> Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com> Co-authored-by: Dirkjan Bussink <d.bussink@gmail.com>
- Loading branch information
Showing
39 changed files
with
83,546 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Add no patterns to .gitignore except for files generated by the build. | ||
last-change | ||
/DATA | ||
# This file is rather large and the tests really only need to be run | ||
# after generation. | ||
/unicode/norm/data_test.go |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
Copyright (c) 2009 The Go Authors. All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are | ||
met: | ||
|
||
* Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
* Redistributions in binary form must reproduce the above | ||
copyright notice, this list of conditions and the following disclaimer | ||
in the documentation and/or other materials provided with the | ||
distribution. | ||
* Neither the name of Google Inc. nor the names of its | ||
contributors may be used to endorse or promote products derived from | ||
this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Additional IP Rights Grant (Patents) | ||
|
||
"This implementation" means the copyrightable works distributed by | ||
Google as part of the Go project. | ||
|
||
Google hereby grants to You a perpetual, worldwide, non-exclusive, | ||
no-charge, royalty-free, irrevocable (except as stated in this section) | ||
patent license to make, have made, use, offer to sell, sell, import, | ||
transfer and otherwise run, modify and propagate the contents of this | ||
implementation of Go, where such license applies only to those patent | ||
claims, both currently owned or controlled by Google and acquired in | ||
the future, licensable by Google that are necessarily infringed by this | ||
implementation of Go. This grant does not include claims that would be | ||
infringed only as a consequence of further modification of this | ||
implementation. If you or your agent or exclusive licensee institute or | ||
order or agree to the institution of patent litigation against any | ||
entity (including a cross-claim or counterclaim in a lawsuit) alleging | ||
that this implementation of Go or any code incorporated within this | ||
implementation of Go constitutes direct or contributory patent | ||
infringement, or inducement of patent infringement, then any patent | ||
rights granted to you under this License for this implementation of Go | ||
shall terminate as of the date such litigation is filed. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
// Copyright 2012 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
// TODO: remove hard-coded versions when we have implemented fractional weights. | ||
// The current implementation is incompatible with later CLDR versions. | ||
//go:generate go run maketables.go -cldr=23 -unicode=6.2.0 | ||
|
||
// Package collate contains types for comparing and sorting Unicode strings | ||
// according to a given collation order. | ||
package collate // import "vitess.io/vitess/go/mysql/collations/vindex/collate" | ||
|
||
import ( | ||
"hash" | ||
|
||
"vitess.io/vitess/go/mysql/collations/vindex/internal/colltab" | ||
) | ||
|
||
type Hasher struct { | ||
iter colltab.Iter | ||
hash hash.Hash | ||
scratch [32]colltab.Elem | ||
} | ||
|
||
// New returns a new Hasher initialized for the given hash function | ||
func New(h hash.Hash) *Hasher { | ||
c := &Hasher{} | ||
c.iter.Weighter = getTable(tableIndex{0x15, 0x0}) | ||
c.iter.Elems = c.scratch[:0] | ||
c.hash = h | ||
return c | ||
} | ||
|
||
func (c *Hasher) Hash(str []byte) []byte { | ||
c.hash.Reset() | ||
c.iter.SetInput(str) | ||
|
||
var scratch [64]byte | ||
var pos int | ||
|
||
for c.iter.Next() { | ||
for n := 0; n < c.iter.N; n++ { | ||
if w := c.iter.Elems[n].Primary(); w > 0 { | ||
if w <= 0x7FFF { | ||
if len(scratch)-pos < 2 { | ||
c.hash.Write(scratch[:pos]) | ||
pos = 0 | ||
} | ||
scratch[pos+0] = uint8(w >> 8) | ||
scratch[pos+1] = uint8(w) | ||
pos += 2 | ||
} else { | ||
if len(scratch)-pos < 3 { | ||
c.hash.Write(scratch[:pos]) | ||
pos = 0 | ||
} | ||
scratch[pos+0] = uint8(w>>16) | 0x80 | ||
scratch[pos+1] = uint8(w >> 8) | ||
scratch[pos+2] = uint8(w) | ||
pos += 3 | ||
} | ||
} | ||
} | ||
c.iter.Discard() | ||
} | ||
c.hash.Write(scratch[:pos]) | ||
return c.hash.Sum(nil) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright 2013 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package collate | ||
|
||
import "vitess.io/vitess/go/mysql/collations/vindex/internal/colltab" | ||
|
||
const blockSize = 64 | ||
|
||
func getTable(t tableIndex) *colltab.Table { | ||
return &colltab.Table{ | ||
Index: colltab.Trie{ | ||
Index0: mainLookup[:][blockSize*t.lookupOffset:], | ||
Values0: mainValues[:][blockSize*t.valuesOffset:], | ||
Index: mainLookup[:], | ||
Values: mainValues[:], | ||
}, | ||
ExpandElem: mainExpandElem[:], | ||
ContractTries: mainCTEntries[:], | ||
ContractElem: mainContractElem[:], | ||
} | ||
} | ||
|
||
// tableIndex holds information for constructing a table | ||
// for a certain locale based on the main table. | ||
type tableIndex struct { | ||
lookupOffset uint32 | ||
valuesOffset uint32 | ||
} |
Oops, something went wrong.