diff --git a/.gitignore b/.gitignore index 11dfdee..330d219 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ .idea examples/examples +examples/test.blv # Binaries for programs and plugins *.exe diff --git a/README.md b/README.md index c90dc46..71cb345 100644 --- a/README.md +++ b/README.md @@ -6,4 +6,48 @@ [![codecov](https://codecov.io/gh/vcaesar/gse-bleve/branch/master/graph/badge.svg)](https://codecov.io/gh/vcaesar/gse-bleve) [![Go Report Card](https://goreportcard.com/badge/github.com/vcaesar/gse-bleve)](https://goreportcard.com/report/github.com/vcaesar/gse-bleve) [![GoDoc](https://godoc.org/github.com/vcaesar/gse-bleve?status.svg)](https://godoc.org/github.com/vcaesar/gse-bleve) -[![Release](https://github-release-version.herokuapp.com/github/vcaesar/gse-bleve/release.svg?style=flat)](https://github.com/vcaesar/gse-bleve/releases/latest) \ No newline at end of file +[![Release](https://github-release-version.herokuapp.com/github/vcaesar/gse-bleve/release.svg?style=flat)](https://github.com/vcaesar/gse-bleve/releases/latest) + + +## Use + +```go +package main + +import ( + "fmt" + "os" + + "github.com/blevesearch/bleve/v2" + gse "github.com/vcaesar/gse-bleve" +) + +func main() { + opt := gse.Option{ + Index: "test.blv", + Dicts: "emend, zh", Stop: "", + Opt: "search-hmm", Trim: "trim"} + + index, err := gse.New(opt) + if err != nil { + fmt.Println("new mapping error is: ", err) + } + + text := `他在命运的沉浮中随波逐流, 扮演着受害与加害者的双重角色` + err = index.Index("1", text) + index.Index("3", text+"沉浮") + index.Index("4", `In view, a humble vaudevillian veteran cast vicariously as both victim and villain vicissitudes of fate.`) + index.Index("2", `It's difficult to understand the sum of a person's life.`) + if err != nil { + fmt.Println("index error: ", err) + } + + query := "命运的沉浮" + req := bleve.NewSearchRequest(bleve.NewQueryStringQuery(query)) + req.Highlight = bleve.NewHighlight() + res, err := index.Search(req) + fmt.Println(res, err) + + os.RemoveAll("test.blv") +} +``` \ No newline at end of file diff --git a/analyzer.go b/analyzer.go index 75cafe5..6b8e154 100644 --- a/analyzer.go +++ b/analyzer.go @@ -1 +1,31 @@ +// Copyright 2016 Evans. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package gsebleve + +import ( + "errors" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +func analyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { + tokenizerName, ok := config["tokenizer"].(string) + if !ok { + return nil, errors.New("must have tokenizer") + } + + tokenizer, err := cache.TokenizerNamed(tokenizerName) + if err != nil { + return nil, err + } + + az := &analysis.Analyzer{Tokenizer: tokenizer} + return az, nil +} + +func init() { + registry.RegisterAnalyzer(TokenName, analyzerConstructor) +} diff --git a/bleve.go b/bleve.go index 75cafe5..ac36111 100644 --- a/bleve.go +++ b/bleve.go @@ -1 +1,154 @@ +// Copyright 2016 Evans. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package gsebleve + +import ( + "strings" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" + "github.com/go-ego/gse" +) + +const ( + TokenName = "gse" +) + +// GseCut gse cut token structure +type GseCut struct { + seg *gse.Segmenter + // stop string + opt string + trim string +} + +// NewGseCut create a gse cut tokenizer +func NewGse(dicts, stop, opt, trim string) (*GseCut, error) { + var ( + seg gse.Segmenter + err error + ) + + seg.SkipLog = true + if dicts == "" { + dicts = "zh" + } + + if strings.Contains(dicts, "emend") { + dicts = strings.Replace(dicts, "emend, ", "", 1) + err = seg.LoadDictEmbed(dicts) + } else { + err = seg.LoadDict(dicts) + } + + if stop != "" { + if strings.Contains(stop, "emend") { + stop = strings.Replace(stop, "emend, ", "", 1) + seg.LoadStopEmbed(stop) + } else { + seg.LoadStop(stop) + } + } + return &GseCut{&seg, opt, trim}, err +} + +// Trim trim the unused token string +func (c *GseCut) Trim(s []string) []string { + if c.trim == "symbol" { + return c.seg.TrimSymbol(s) + } + + if c.trim == "punct" { + return c.seg.TrimPunct(s) + } + + if c.trim == "trim" { + return c.seg.Trim(s) + } + + return s +} + +// Cut option the gse cut mode +func (c *GseCut) Cut(text string, opt string) []string { + if c.trim == "html" { + return c.seg.CutTrimHtml(text) + } + + if c.trim == "url" { + return c.seg.CutUrl(text) + } + + if opt == "search-hmm" { + return c.seg.CutSearch(text, true) + } + if opt == "search" { + return c.seg.CutSearch(text) + } + + if opt == "search-dag" { + return c.seg.CutSearch(text, false) + } + + if opt == "all" { + return c.seg.CutAll(text) + } + + if opt == "hmm" { + return c.seg.Cut(text, true) + } + + if opt == "dag" { + return c.seg.Cut(text, false) + } + + return c.seg.Cut(text) +} + +// Tokenize cut the text to bleve token stream +func (c *GseCut) Tokenize(text []byte) analysis.TokenStream { + result := make(analysis.TokenStream, 0) + cuts := c.Trim(c.Cut(string(text), c.opt)) + // fmt.Println("cuts: ", cuts) + azs := c.seg.Analyze(cuts) + for _, az := range azs { + token := analysis.Token{ + Term: []byte(az.Text), + Start: az.Start, + End: az.End, + Position: az.Position, + Type: analysis.Ideographic, + } + result = append(result, &token) + } + return result +} + +func tokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { + dicts, ok := config["dicts"].(string) + if !ok { + dicts = "" + } + stop, ok := config["stop"].(string) + if !ok { + stop = "" + } + + opt, ok := config["opt"].(string) + if !ok { + opt = "" + } + + trim, ok := config["trim"].(string) + if !ok { + trim = "" + } + + return NewGse(dicts, stop, opt, trim) +} + +func init() { + registry.RegisterTokenizer(TokenName, tokenizerConstructor) +} diff --git a/examples/main.go b/examples/main.go index 06ab7d0..0c327cc 100644 --- a/examples/main.go +++ b/examples/main.go @@ -1 +1,38 @@ package main + +import ( + "fmt" + "os" + + "github.com/blevesearch/bleve/v2" + gse "github.com/vcaesar/gse-bleve" +) + +func main() { + opt := gse.Option{ + Index: "test.blv", + Dicts: "emend, zh", Stop: "", + Opt: "search-hmm", Trim: "trim"} + + index, err := gse.New(opt) + if err != nil { + fmt.Println("new mapping error is: ", err) + } + + text := `他在命运的沉浮中随波逐流, 扮演着受害与加害者的双重角色` + err = index.Index("1", text) + index.Index("3", text+"沉浮") + index.Index("4", `In view, a humble vaudevillian veteran cast vicariously as both victim and villain vicissitudes of fate.`) + index.Index("2", `It's difficult to understand the sum of a person's life.`) + if err != nil { + fmt.Println("index error: ", err) + } + + query := "命运的沉浮" + req := bleve.NewSearchRequest(bleve.NewQueryStringQuery(query)) + req.Highlight = bleve.NewHighlight() + res, err := index.Search(req) + fmt.Println(res, err) + + os.RemoveAll("test.blv") +} diff --git a/go.mod b/go.mod index 5bdecec..a2f1ea7 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,26 @@ require ( ) require ( + github.com/RoaringBitmap/roaring v0.9.4 // indirect + github.com/bits-and-blooms/bitset v1.2.0 // indirect github.com/blevesearch/bleve_index_api v1.0.1 // indirect + github.com/blevesearch/go-porterstemmer v1.0.3 // indirect + github.com/blevesearch/mmap-go v1.0.3 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.1.0 // indirect + github.com/blevesearch/segment v0.9.0 // indirect + github.com/blevesearch/snowballstem v0.9.0 // indirect github.com/blevesearch/upsidedown_store_api v1.0.1 // indirect + github.com/blevesearch/vellum v1.0.6 // indirect + github.com/blevesearch/zapx/v11 v11.3.0 // indirect + github.com/blevesearch/zapx/v12 v12.3.0 // indirect + github.com/blevesearch/zapx/v13 v13.3.0 // indirect + github.com/blevesearch/zapx/v14 v14.3.0 // indirect + github.com/blevesearch/zapx/v15 v15.3.0 // indirect + github.com/golang/protobuf v1.3.2 // indirect + github.com/golang/snappy v0.0.1 // indirect + github.com/mschoch/smat v0.2.0 // indirect + github.com/steveyen/gtreap v0.1.0 // indirect github.com/vcaesar/cedar v0.10.1 // indirect + go.etcd.io/bbolt v1.3.5 // indirect + golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd // indirect ) diff --git a/go.sum b/go.sum index a3e8b7d..8bf89b5 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,9 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/RoaringBitmap/roaring v0.4.23/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo= +github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo= github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= github.com/blevesearch/bleve v1.0.14 h1:Q8r+fHTt35jtGXJUM0ULwM3Tzg+MRfyai4ZkWDy2xO4= github.com/blevesearch/bleve v1.0.14/go.mod h1:e/LJTr+E7EaoVdkQZTfoz7dt4KoDNvDbLb8MSKuNTLQ= @@ -14,7 +16,9 @@ github.com/blevesearch/cld2 v0.0.0-20200327141045-8b5f551d37f5/go.mod h1:PN0QNTL github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= +github.com/blevesearch/mmap-go v1.0.3 h1:7QkALgFNooSq3a46AE+pWeKASAZc9SiNFJhDGF1NDx4= github.com/blevesearch/mmap-go v1.0.3/go.mod h1:pYvKl/grLQrBxuaRYgoTssa4rVujYYeenDp++2E+yvs= +github.com/blevesearch/scorch_segment_api/v2 v2.1.0 h1:NFwteOpZEvJk5Vg0H6gD0hxupsG3JYocE4DBvsA2GZI= github.com/blevesearch/scorch_segment_api/v2 v2.1.0/go.mod h1:uch7xyyO/Alxkuxa+CGs79vw0QY8BENSBjg6Mw5L5DE= github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac= github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ= @@ -22,16 +26,22 @@ github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= github.com/blevesearch/upsidedown_store_api v1.0.1 h1:1SYRwyoFLwG3sj0ed89RLtM15amfX2pXlYbFOnF8zNU= github.com/blevesearch/upsidedown_store_api v1.0.1/go.mod h1:MQDVGpHZrpe3Uy26zJBf/a8h0FZY6xJbthIMm8myH2Q= +github.com/blevesearch/vellum v1.0.6 h1:F7mKWx+xt7e+i3LGtCUhLI2rLonBgT1uke7WP3mL5v4= github.com/blevesearch/vellum v1.0.6/go.mod h1:vMf1MTDsr8l1N5GEi6idH9zF0w8XuY8W8gFuwkIB77g= github.com/blevesearch/zap/v11 v11.0.14/go.mod h1:MUEZh6VHGXv1PKx3WnCbdP404LGG2IZVa/L66pyFwnY= github.com/blevesearch/zap/v12 v12.0.14/go.mod h1:rOnuZOiMKPQj18AEKEHJxuI14236tTQ1ZJz4PAnWlUg= github.com/blevesearch/zap/v13 v13.0.6/go.mod h1:L89gsjdRKGyGrRN6nCpIScCvvkyxvmeDCwZRcjjPCrw= github.com/blevesearch/zap/v14 v14.0.5/go.mod h1:bWe8S7tRrSBTIaZ6cLRbgNH4TUDaC9LZSpRGs85AsGY= github.com/blevesearch/zap/v15 v15.0.3/go.mod h1:iuwQrImsh1WjWJ0Ue2kBqY83a0rFtJTqfa9fp1rbVVU= +github.com/blevesearch/zapx/v11 v11.3.0 h1:+jY21MuYjDRgx+3qaa0wxc4U7GBn4/jQGPpnLwjstsw= github.com/blevesearch/zapx/v11 v11.3.0/go.mod h1:HGTEgEu2wTQlsU6qo7rzD6DNhBX6A7zfvmqUZDkOshA= +github.com/blevesearch/zapx/v12 v12.3.0 h1:EgcIPrFDwzCGXTFLB2vwhPhofK919jkgOSMVPjHiHw8= github.com/blevesearch/zapx/v12 v12.3.0/go.mod h1:s9mAstyfG0dNfpPXYvn2zZjAFq3ShI/+6g5Qhfvpjc0= +github.com/blevesearch/zapx/v13 v13.3.0 h1:5P26oHX2Et7RpQ0MTOru4oYp+W1twubdxWbQr9dBU+Q= github.com/blevesearch/zapx/v13 v13.3.0/go.mod h1:4MuSkz6EozY3YXszSDIEGY+OWewS2zeL9uETAGTouXI= +github.com/blevesearch/zapx/v14 v14.3.0 h1:RuWkG3TNUBjQK65CKygxb6/TWUSzxgcWh38QG1fzkeo= github.com/blevesearch/zapx/v14 v14.3.0/go.mod h1:vY+X3JRtXEFaNmmcfgUspIyUHekt/fwIikufRj4TD94= +github.com/blevesearch/zapx/v15 v15.3.0 h1:e2Uxc3qHrcpoi5mIlFRxoTmfAsZUqzbG/nuhEqG9b8g= github.com/blevesearch/zapx/v15 v15.3.0/go.mod h1:Q01qHJAnV06LeawXiR1MYqonu9ZZe1ICHccZtSiBXoI= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= @@ -44,6 +54,7 @@ github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d/go.mod h1:URriBxXwVq5ijiJ1 github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM= github.com/cznic/strutil v0.0.0-20181122101858-275e90344537/go.mod h1:AHHPPPXTw0h6pVabbcbyGRK1DckRn7r/STdZEeIDzZc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64= github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqLaRiH3MsBH8va0n7s1pQYcu3uTb8G4tygF4Zg= @@ -54,8 +65,10 @@ github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1T github.com/go-ego/gse v0.69.8 h1:enzXx0Z/U5UhmUAZSD/bh1rP83hMfrhF/ZEBe784nDU= github.com/go-ego/gse v0.69.8/go.mod h1:TXy19dAfok1+NOqfTUFVqifNAcAz7srXfG+jLC8RZ0s= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= @@ -69,12 +82,14 @@ github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czP github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= @@ -85,9 +100,11 @@ github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tL github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= +github.com/steveyen/gtreap v0.1.0 h1:CjhzTa274PyJLJuMZwIzCO1PfC00oRa8d1Kc78bFXJM= github.com/steveyen/gtreap v0.1.0/go.mod h1:kl/5J7XbrOmlIbYIXdRHDDE5QxHqpk0cmkT7Z4dM9/Y= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ= github.com/tebeka/snowball v0.4.2/go.mod h1:4IfL14h1lvwZcp1sfXuuc7/7yCsvVffTWxWxCLfFpYg= @@ -100,6 +117,7 @@ github.com/vcaesar/tt v0.20.0 h1:9t2Ycb9RNHcP0WgQgIaRKJBB+FrRdejuaL6uWIHuoBA= github.com/vcaesar/tt v0.20.0/go.mod h1:GHPxQYhn+7OgKakRusH7KJ0M5MhywoeLb8Fcffs/Gtg= github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -109,10 +127,12 @@ golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/index.go b/index.go new file mode 100644 index 0000000..2bddb52 --- /dev/null +++ b/index.go @@ -0,0 +1,64 @@ +// Copyright 2016 Evans. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gsebleve + +import ( + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/mapping" +) + +// Option gse bleve option structure +type Option struct { + Index string + Dicts, Stop, Opt, Trim string +} + +// NewMapping new bleve index mapping +func NewMapping(opt Option) (*mapping.IndexMappingImpl, error) { + mapping := bleve.NewIndexMapping() + + err := mapping.AddCustomTokenizer(TokenName, map[string]interface{}{ + "type": TokenName, + "dicts": opt.Dicts, + "stop": opt.Stop, + "opt": opt.Opt, + "trim": opt.Trim, + }) + if err != nil { + return mapping, err + } + + err = mapping.AddCustomAnalyzer(TokenName, map[string]interface{}{ + "type": TokenName, + "tokenizer": TokenName, + }) + + if err != nil { + return mapping, err + } + + mapping.DefaultAnalyzer = TokenName + return mapping, nil +} + +// New new bleve index +func New(opt Option) (bleve.Index, error) { + mapping, err := NewMapping(opt) + if err != nil { + return nil, err + } + + return bleve.New(opt.Index, mapping) +} + +// NewMem new bleve index only memory +func NewMem(opt Option) (bleve.Index, error) { + mapping, err := NewMapping(opt) + if err != nil { + return nil, err + } + + return bleve.NewMemOnly(mapping) +}