Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds longest prefix matching for custom indexes. #20

Merged
merged 3 commits into from
Mar 1, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions txn.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,39 @@ func (txn *Txn) First(table, index string, args ...interface{}) (interface{}, er
return value, nil
}

// LongestPrefix is used to fetch the longest prefix match for the given
// constraints on the index. Note that this will not work with the memdb
// StringFieldIndex because it adds null terminators which prevent the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to make a call to PrefixFromArgs() to strip the \x00? Since this only supports prefix indexers, seems like that should be possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ryanuber I thought that, too. You can do that to strip the \x00 off the prefix, but when the LongestPrefix routine looks in the tree it hits the node above the actual entry in the index, so it doesn't report anything since that node isn't a leaf. Here's an example:

Say you have foo in your tree with a string field indexer. That'll be in there as foo\x00. When you do this algorithm for foobar it'll scan for the common prefix foo just fine and end up at a node with a prefix of foo and an edge to \x00 which is the proper leaf. It doesn't know that it should follow that edge, though, so it just says that there's no entry in there. Because we always put a suffix in there for the string indexer it always runs into this. If you knew to look up foo\x00 it would work, but you don't know what the longest prefix is when you do a query.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the right fix would be to stop adding the \x00 for the string indexer and instead put that into the compound indexer, because that's where it really matters. That's a bigger change though, and doing that wouldn't affect this interface, just the cautionary comment :-)

// algorithm from correctly finding a match (it will get to right before the
// null and fail to find a leaf node). This should only be used where the prefix
// given is capable of matching indexed entries directly, which typically only
// applies to a custom indexer. See the unit test for an example.
func (txn *Txn) LongestPrefix(table, index string, args ...interface{}) (interface{}, error) {
// Enforce that this only works on prefix indexes.
if !strings.HasSuffix(index, "_prefix") {
return nil, fmt.Errorf("must use '%s_prefix' on index", index)
}

// Get the index value.
indexSchema, val, err := txn.getIndexValue(table, index, args...)
if err != nil {
return nil, err
}

// This algorithm only makes sense against a unique index, otherwise the
// index keys will have the IDs appended to them.
if !indexSchema.Unique {
return nil, fmt.Errorf("index '%s' is not unique", index)
}

// Find the longest prefix match with the given index.
indexTxn := txn.readableIndex(table, indexSchema.Name)
if _, value, ok := indexTxn.Root().LongestPrefix(val); ok {
return value, nil
}
return nil, nil
}

// getIndexValue is used to get the IndexSchema and the value
// used to scan the index given the parameters. This handles prefix based
// scans when the index has the "_prefix" suffix. The index must support
Expand Down
187 changes: 186 additions & 1 deletion txn_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package memdb

import "testing"
import (
"fmt"
"strings"
"testing"
)

func testDB(t *testing.T) *MemDB {
db, err := NewMemDB(testValidSchema())
Expand Down Expand Up @@ -585,6 +589,187 @@ func TestTxn_InsertGet_Prefix(t *testing.T) {
checkResult(txn)
}

// CustomIndex is a simple custom indexer that doesn't add any suffixes to its
// object keys; this is compatible with the LongestPrefixMatch algorithm.
type CustomIndex struct{}

// FromObject takes the Foo field of a TestObject and prepends a null.
func (*CustomIndex) FromObject(obj interface{}) (bool, []byte, error) {
t, ok := obj.(*TestObject)
if !ok {
return false, nil, fmt.Errorf("not a test object")
}

// Prepend a null so we can address an empty Foo field.
out := "\x00" + t.Foo
return true, []byte(out), nil
}

// FromArgs always returns an error.
func (*CustomIndex) FromArgs(args ...interface{}) ([]byte, error) {
return nil, fmt.Errorf("only prefix lookups are supported")
}

// Prefix from args takes the argument as a string and prepends a null.
func (*CustomIndex) PrefixFromArgs(args ...interface{}) ([]byte, error) {
if len(args) != 1 {
return nil, fmt.Errorf("must provide only a single argument")
}
arg, ok := args[0].(string)
if !ok {
return nil, fmt.Errorf("argument must be a string: %#v", args[0])
}
arg = "\x00" + arg
return []byte(arg), nil
}

func TestTxn_InsertGet_LongestPrefix(t *testing.T) {
schema := &DBSchema{
Tables: map[string]*TableSchema{
"main": &TableSchema{
Name: "main",
Indexes: map[string]*IndexSchema{
"id": &IndexSchema{
Name: "id",
Unique: true,
Indexer: &StringFieldIndex{
Field: "ID",
},
},
"foo": &IndexSchema{
Name: "foo",
Unique: true,
Indexer: &CustomIndex{},
},
"nope": &IndexSchema{
Name: "nope",
Indexer: &CustomIndex{},
},
},
},
},
}

db, err := NewMemDB(schema)
if err != nil {
t.Fatalf("err: %v", err)
}

txn := db.Txn(true)

obj1 := &TestObject{
ID: "object1",
Foo: "foo",
}
obj2 := &TestObject{
ID: "object2",
Foo: "foozipzap",
}
obj3 := &TestObject{
ID: "object3",
Foo: "",
}

err = txn.Insert("main", obj1)
if err != nil {
t.Fatalf("err: %v", err)
}
err = txn.Insert("main", obj2)
if err != nil {
t.Fatalf("err: %v", err)
}
err = txn.Insert("main", obj3)
if err != nil {
t.Fatalf("err: %v", err)
}

checkResult := func(txn *Txn) {
raw, err := txn.LongestPrefix("main", "foo_prefix", "foo")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj1 {
t.Fatalf("bad: %#v", raw)
}

raw, err = txn.LongestPrefix("main", "foo_prefix", "foobar")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj1 {
t.Fatalf("bad: %#v", raw)
}

raw, err = txn.LongestPrefix("main", "foo_prefix", "foozip")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj1 {
t.Fatalf("bad: %#v", raw)
}

raw, err = txn.LongestPrefix("main", "foo_prefix", "foozipza")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj1 {
t.Fatalf("bad: %#v", raw)
}

raw, err = txn.LongestPrefix("main", "foo_prefix", "foozipzap")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj2 {
t.Fatalf("bad: %#v", raw)
}

raw, err = txn.LongestPrefix("main", "foo_prefix", "foozipzapzone")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj2 {
t.Fatalf("bad: %#v", raw)
}

raw, err = txn.LongestPrefix("main", "foo_prefix", "funky")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj3 {
t.Fatalf("bad: %#v", raw)
}

raw, err = txn.LongestPrefix("main", "foo_prefix", "")
if err != nil {
t.Fatalf("err: %v", err)
}
if raw != obj3 {
t.Fatalf("bad: %#v", raw)
}
}

// Check the results within the txn
checkResult(txn)

// Commit and start a new read transaction
txn.Commit()
txn = db.Txn(false)

// Check the results in a new txn
checkResult(txn)

// Try some disallowed index types.
_, err = txn.LongestPrefix("main", "foo", "")
if err == nil || !strings.Contains(err.Error(), "must use 'foo_prefix' on index") {
t.Fatalf("bad: %v", err)
}
_, err = txn.LongestPrefix("main", "nope_prefix", "")
if err == nil || !strings.Contains(err.Error(), "index 'nope_prefix' is not unique") {
t.Fatalf("bad: %v", err)
}
}

func TestTxn_Defer(t *testing.T) {
db := testDB(t)
txn := db.Txn(true)
Expand Down