Skip to content

Commit

Permalink
perf: 优化 trie 匹配算法
Browse files Browse the repository at this point in the history
  • Loading branch information
nopdan committed Sep 22, 2023
1 parent 5044bcc commit ca63017
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 178 deletions.
46 changes: 14 additions & 32 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,12 @@ import (
)

var conf = &struct {
Text []string // 文本
Dict []string // 码表
Texts []string // 文本
Dicts []string // 码表

Single bool // 单字模式
Algo string // 匹配算法
Stable bool // 按码表顺序(覆盖algo)
PressSpaceBy string // 空格按键方式 left|right|both
Clean bool // 只统计词库中的词条
smq.Dict

Verbose bool // 输出全部数据
Split bool // 输出分词数据
Stat bool // 输出词条数据
Json bool // 输出json数据
HTML bool // 保存 html 结果

Expand All @@ -28,11 +22,12 @@ var conf = &struct {
}{}

func init() {
rootCmd.Flags().StringArrayVarP(&conf.Text, "text", "t", nil, "文本文件或文件夹,可以为多个")
rootCmd.Flags().StringArrayVarP(&conf.Dict, "dict", "i", nil, "码表文件或文件夹,可以为多个")
rootCmd.Flags().StringArrayVarP(&conf.Texts, "text", "t", nil, "文本文件或文件夹,可以为多个")
rootCmd.Flags().StringArrayVarP(&conf.Dicts, "dict", "i", nil, "码表文件或文件夹,可以为多个")

rootCmd.Flags().BoolVarP(&conf.Single, "single", "s", false, "启用单字模式")
rootCmd.Flags().BoolVarP(&conf.Stable, "stable", "", false, "按码表顺序")
rootCmd.Flags().BoolVarP(&conf.UseTail, "tail", "", false, "use tail")
rootCmd.Flags().StringVarP(&conf.PressSpaceBy, "space", "k", "both", "空格按键方式 left|right|both")
rootCmd.Flags().BoolVarP(&conf.Clean, "clean", "c", false, "只统计词库中的词条")

Expand All @@ -47,13 +42,10 @@ func init() {
}

func _root() {
if len(conf.Dict) == 0 || len(conf.Text) == 0 {
if len(conf.Dicts) == 0 || len(conf.Texts) == 0 {
fmt.Println("输入有误")
return
}
if conf.Stable {
conf.Algo = "strie"
}
if conf.Verbose {
conf.Split = true
conf.Stat = true
Expand All @@ -62,8 +54,8 @@ func _root() {
}
// 开始计时
start := time.Now()
texts := make([]string, 0, len(conf.Text))
for _, v := range conf.Text {
texts := make([]string, 0, len(conf.Texts))
for _, v := range conf.Texts {
texts = append(texts, getFiles(v)...)
}
fmt.Println("载入文本:")
Expand All @@ -72,28 +64,18 @@ func _root() {
}
fmt.Println()

dictNames := make([]string, 0, len(conf.Dict))
for _, v := range conf.Dict {
dictNames := make([]string, 0, len(conf.Dicts))
for _, v := range conf.Dicts {
dictNames = append(dictNames, getFiles(v)...)
}
newDict := func() *smq.Dict {
return &smq.Dict{
Single: conf.Single,
Algorithm: conf.Algo,
PressSpaceBy: conf.PressSpaceBy,
Clean: conf.Clean,
Split: conf.Split,
Stat: conf.Stat,
}
}
dicts := make([]*smq.Dict, 0, len(dictNames))
fmt.Println("载入码表:")
dictStartTime := time.Now()
mid := time.Now()
for _, v := range dictNames {
d := newDict()
d.Load(v)
dicts = append(dicts, d)
dict := conf.Dict
dict.Load(v)
dicts = append(dicts, &dict)
if len(dictNames) == 1 {
fmt.Println("=> ", v)
} else {
Expand Down
8 changes: 1 addition & 7 deletions internal/serve/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,9 @@ func parseOptions(src []byte) Options {
}

func toSmqDict(opt optDict) *smq.Dict {
var algo string
if opt.Stable {
algo = "strie"
} else {
algo = "trie"
}
dict := &smq.Dict{
Single: opt.Single,
Algorithm: algo,
Stable: opt.Stable,
PressSpaceBy: opt.Space,
}
dict.Load("dict/" + opt.Path)
Expand Down
17 changes: 0 additions & 17 deletions pkg/matcher/matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,3 @@ type Matcher interface {
// 匹配下一个词,返回匹配到的词长,编码和候选位置
Match([]rune) (int, string, int)
}

// 匹配算法
func New(alg string) Matcher {
var m Matcher
switch alg {
case "single":
m = NewSingle()
// fmt.Println("匹配算法:单字专用 hashMap(with rune key)")
case "strie", "s":
m = NewStableTrie()
// fmt.Println("匹配算法:稳定的 trie(hashMap impl)")
default: // 默认 trie 算法
m = NewTrie()
// fmt.Println("匹配算法:trie(hashMap impl)")
}
return m
}
72 changes: 0 additions & 72 deletions pkg/matcher/stable_trie.go

This file was deleted.

Loading

0 comments on commit ca63017

Please sign in to comment.