From 6acea779533f9480157aa8e06c1e831b79b881a3 Mon Sep 17 00:00:00 2001 From: nopdan Date: Sun, 7 May 2023 19:38:41 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E5=87=8F=E5=B0=91=E4=BE=9D=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- cmd/gen.go | 35 ++++----- cmd/go.go | 159 ---------------------------------------- cmd/main.go | 30 ++++++++ cmd/root.go | 163 +++++++++++++++++++++++++++++++++++++----- go.mod | 10 +-- go.sum | 14 ++-- internal/gen/gen.go | 55 ++++++-------- internal/gen/jisu.go | 34 ++++----- internal/gen/tsv.go | 54 ++++++++++++++ internal/gen/write.go | 16 ++--- pkg/smq/dict.go | 6 +- pkg/smq/smq.go | 6 +- 13 files changed, 308 insertions(+), 276 deletions(-) delete mode 100644 cmd/go.go create mode 100644 cmd/main.go create mode 100644 internal/gen/tsv.go diff --git a/README.md b/README.md index c22dbaf..c6b60b4 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ 使用 `.\smq.exe gen` 命令转换格式。 -支持格式:_极速赛码表(jisu)_ 和 [_其他_](https://github.com/flowerime/rose) +支持格式:_极速赛码表(jisu|js)_、_多多(duoduo|dd)_、_冰凌(bingling|bl)_ ### 主命令参数解释 diff --git a/cmd/gen.go b/cmd/gen.go index b82adeb..50bca3b 100644 --- a/cmd/gen.go +++ b/cmd/gen.go @@ -8,7 +8,7 @@ import ( "github.com/AlecAivazis/survey/v2" "github.com/AlecAivazis/survey/v2/terminal" "github.com/flowerime/gosmq/internal/gen" - util "github.com/flowerime/goutil" + "github.com/nopdan/ku" "github.com/spf13/cobra" ) @@ -16,14 +16,7 @@ var genCmd = &cobra.Command{ Use: "gen", Short: "转换赛码表", Run: func(cmd *cobra.Command, args []string) { - if len(args) == 0 { - fmt.Println("交互模式转换赛码表") - genWithSurvey() - return - } - table := Config.Gen() - path := "dict/" + util.GetFileName(Config.Path) + ".txt" - gen.Write(table, path) + _gen() }, } @@ -37,7 +30,14 @@ func init() { genCmd.Flags().BoolVarP(&Config.SortByWordLen, "sort", "s", false, "按照词长重新排序") } -func genWithSurvey() { +func _gen() { + // 命令行模式 + if Config.Path != "" { + gen_write(Config) + return + } + + // 交互模式 var conf gen.Config handle := func(err error) { if err != nil { @@ -58,15 +58,14 @@ func genWithSurvey() { err = survey.AskOne(&survey.Select{ Message: "码表格式:", - Options: []string{"极速赛码表", "多多(词在前)", "冰凌(编码在前)", "极点"}, + Options: []string{"极速赛码表", "多多", "冰凌"}, }, &conf.Format) handle(err) mFormat := make(map[string]string) mFormat["极速赛码表"] = "jisu" - mFormat["多多(词在前)"] = "duoduo" - mFormat["冰凌(编码在前)"] = "bingling" - mFormat["极点"] = "jidian" + mFormat["多多"] = "duoduo" + mFormat["冰凌"] = "bingling" conf.Format = mFormat[conf.Format] if conf.Format != "jisu" { @@ -91,10 +90,12 @@ func genWithSurvey() { handle(err) } - fmt.Println() - fmt.Println(conf) + fmt.Printf("\nconf: %v\n", conf) + gen_write(conf) +} +func gen_write(conf gen.Config) { table := conf.Gen() - path := "dict/" + util.GetFileName(conf.Path) + ".txt" + path := "dict/" + ku.GetFileName(conf.Path) + ".txt" gen.Write(table, path) } diff --git a/cmd/go.go b/cmd/go.go deleted file mode 100644 index adbe777..0000000 --- a/cmd/go.go +++ /dev/null @@ -1,159 +0,0 @@ -package cmd - -import ( - "fmt" - "time" - - "github.com/flowerime/gosmq/pkg/smq" -) - -var conf = &struct { - Text []string // 文本 - Dict []string // 码表 - - Single bool // 单字模式 - Algo string // 匹配算法 - Stable bool // 按码表顺序(覆盖algo) - PressSpaceBy string // 空格按键方式 left|right|both - Clean bool // 只统计词库中的词条 - - Verbose bool // 输出全部数据 - Split bool // 输出分词数据 - Stat bool // 输出词条数据 - Json bool // 输出json数据 - HTML bool // 保存 html 结果 - - Hidden bool // 隐藏 cli 结果展示 - Merge bool // 合并多文本的结果 -}{} - -func init() { - rootCmd.Flags().StringArrayVarP(&conf.Text, "text", "t", nil, "文本文件或文件夹,可以为多个") - rootCmd.Flags().StringArrayVarP(&conf.Dict, "dict", "i", nil, "码表文件或文件夹,可以为多个") - - rootCmd.Flags().BoolVarP(&conf.Single, "single", "s", false, "启用单字模式") - rootCmd.Flags().BoolVarP(&conf.Stable, "stable", "", false, "按码表顺序") - rootCmd.Flags().StringVarP(&conf.PressSpaceBy, "space", "k", "both", "空格按键方式 left|right|both") - rootCmd.Flags().BoolVarP(&conf.Clean, "clean", "c", false, "只统计词库中的词条") - - rootCmd.Flags().BoolVarP(&conf.Verbose, "verbose", "v", false, "输出全部数据") - rootCmd.Flags().BoolVarP(&conf.Split, "split", "", false, "输出分词数据") - rootCmd.Flags().BoolVarP(&conf.Stat, "stat", "", false, "输出词条数据") - rootCmd.Flags().BoolVarP(&conf.Json, "json", "", false, "输出 json 数据") - rootCmd.Flags().BoolVarP(&conf.HTML, "html", "", false, "保存 html 结果") - - rootCmd.Flags().BoolVarP(&conf.Hidden, "hidden", "", false, "隐藏 cli 结果展示") - rootCmd.Flags().BoolVarP(&conf.Merge, "merge", "m", false, "合并多文本的结果") -} - -func goCli() { - if len(conf.Dict) == 0 || len(conf.Text) == 0 { - fmt.Println("输入有误") - return - } - if conf.Stable { - conf.Algo = "strie" - } - if conf.Verbose { - conf.Split = true - conf.Stat = true - conf.Json = true - conf.HTML = true - } - // 开始计时 - start := time.Now() - texts := make([]string, 0, len(conf.Text)) - for _, v := range conf.Text { - texts = append(texts, getFiles(v)...) - } - fmt.Println("载入文本:") - for _, v := range texts { - fmt.Println("-> ", v) - } - fmt.Println() - - dictNames := make([]string, 0, len(conf.Dict)) - for _, v := range conf.Dict { - dictNames = append(dictNames, getFiles(v)...) - } - newDict := func() *smq.Dict { - return &smq.Dict{ - Single: conf.Single, - Algorithm: conf.Algo, - PressSpaceBy: conf.PressSpaceBy, - Clean: conf.Clean, - Split: conf.Split, - Stat: conf.Stat, - } - } - dicts := make([]*smq.Dict, 0, len(dictNames)) - fmt.Println("载入码表:") - dictStartTime := time.Now() - mid := time.Now() - for _, v := range dictNames { - d := newDict() - d.Load(v) - dicts = append(dicts, d) - if len(dictNames) == 1 { - fmt.Println("=> ", v) - } else { - fmt.Println("=> ", v, "\t耗时:", time.Since(mid)) - mid = time.Now() - } - } - fmt.Printf("载入码表耗时:%v\n\n", time.Since(dictStartTime)) - - // race - fmt.Println("比赛开始...") - textLenTotal := 0 - var printEnd = func() { - if conf.Split { - if conf.Merge { - fmt.Println("--merge 不会输出分词结果。") - } else { - fmt.Println("已输出分词结果") - } - } - if conf.Stat { - fmt.Println("已输出词条统计数据") - } - if conf.HTML { - fmt.Println("已保存 html 结果") - } - if conf.Json { - fmt.Println("已输出 json 数据") - } - fmt.Printf("共载入 %d 个码表,%d 个文本,总字数 %d,总耗时:%v\n", len(dicts), len(texts), textLenTotal, time.Since(start)) - } - - if conf.Merge { - resArr := smq.ParallelMerge(texts, dicts) - for _, res := range resArr { - if !conf.Hidden { - printSep() - Output([]*smq.Result{res}) - } - OutputHTML([]*smq.Result{res}, conf.HTML) - OutPutJson(res, conf.Json) - textLenTotal = res.TextLen - } - printEnd() - return - } - - smq.Parallel(texts, dicts, func(v []*smq.Result) { - if len(v) == 0 { - return - } - textLenTotal += v[0].TextLen - if !conf.Hidden { - printSep() - Output(v) - } - OutputHTML(v, conf.HTML) - for _, res := range v { - OutPutJson(res, conf.Json) - } - }) - printEnd() -} diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 0000000..c2a089f --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,30 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "", + Short: "", + Long: "这是最快的赛码器\n用于对基于码表的输入法针对特定文章进行测评\nhttps://github.com/flowerime/gosmq", + Run: func(cmd *cobra.Command, args []string) { + _root() + }, +} + +func init() { + rootCmd.AddCommand(versionCmd) + rootCmd.AddCommand(serveCmd) + rootCmd.AddCommand(genCmd) +} + +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} diff --git a/cmd/root.go b/cmd/root.go index 3ae2ef0..a862564 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -2,29 +2,158 @@ package cmd import ( "fmt" - "os" + "time" - "github.com/spf13/cobra" + "github.com/flowerime/gosmq/pkg/smq" ) -var rootCmd = &cobra.Command{ - Use: "", - Short: "", - Long: "这是最快的赛码器\n用于对基于码表的输入法针对特定文章进行测评\nhttps://github.com/flowerime/gosmq", - Run: func(cmd *cobra.Command, args []string) { - goCli() - }, -} +var conf = &struct { + Text []string // 文本 + Dict []string // 码表 + + Single bool // 单字模式 + Algo string // 匹配算法 + Stable bool // 按码表顺序(覆盖algo) + PressSpaceBy string // 空格按键方式 left|right|both + Clean bool // 只统计词库中的词条 + + Verbose bool // 输出全部数据 + Split bool // 输出分词数据 + Stat bool // 输出词条数据 + Json bool // 输出json数据 + HTML bool // 保存 html 结果 + + Hidden bool // 隐藏 cli 结果展示 + Merge bool // 合并多文本的结果 +}{} func init() { - rootCmd.AddCommand(versionCmd) - rootCmd.AddCommand(serveCmd) - rootCmd.AddCommand(genCmd) + rootCmd.Flags().StringArrayVarP(&conf.Text, "text", "t", nil, "文本文件或文件夹,可以为多个") + rootCmd.Flags().StringArrayVarP(&conf.Dict, "dict", "i", nil, "码表文件或文件夹,可以为多个") + + rootCmd.Flags().BoolVarP(&conf.Single, "single", "s", false, "启用单字模式") + rootCmd.Flags().BoolVarP(&conf.Stable, "stable", "", false, "按码表顺序") + rootCmd.Flags().StringVarP(&conf.PressSpaceBy, "space", "k", "both", "空格按键方式 left|right|both") + rootCmd.Flags().BoolVarP(&conf.Clean, "clean", "c", false, "只统计词库中的词条") + + rootCmd.Flags().BoolVarP(&conf.Verbose, "verbose", "v", false, "输出全部数据") + rootCmd.Flags().BoolVarP(&conf.Split, "split", "", false, "输出分词数据") + rootCmd.Flags().BoolVarP(&conf.Stat, "stat", "", false, "输出词条数据") + rootCmd.Flags().BoolVarP(&conf.Json, "json", "", false, "输出 json 数据") + rootCmd.Flags().BoolVarP(&conf.HTML, "html", "", false, "保存 html 结果") + + rootCmd.Flags().BoolVarP(&conf.Hidden, "hidden", "", false, "隐藏 cli 结果展示") + rootCmd.Flags().BoolVarP(&conf.Merge, "merge", "m", false, "合并多文本的结果") } -func Execute() { - if err := rootCmd.Execute(); err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) +func _root() { + if len(conf.Dict) == 0 || len(conf.Text) == 0 { + fmt.Println("输入有误") + return + } + if conf.Stable { + conf.Algo = "strie" + } + if conf.Verbose { + conf.Split = true + conf.Stat = true + conf.Json = true + conf.HTML = true + } + // 开始计时 + start := time.Now() + texts := make([]string, 0, len(conf.Text)) + for _, v := range conf.Text { + texts = append(texts, getFiles(v)...) + } + fmt.Println("载入文本:") + for _, v := range texts { + fmt.Println("-> ", v) + } + fmt.Println() + + dictNames := make([]string, 0, len(conf.Dict)) + for _, v := range conf.Dict { + dictNames = append(dictNames, getFiles(v)...) + } + newDict := func() *smq.Dict { + return &smq.Dict{ + Single: conf.Single, + Algorithm: conf.Algo, + PressSpaceBy: conf.PressSpaceBy, + Clean: conf.Clean, + Split: conf.Split, + Stat: conf.Stat, + } + } + dicts := make([]*smq.Dict, 0, len(dictNames)) + fmt.Println("载入码表:") + dictStartTime := time.Now() + mid := time.Now() + for _, v := range dictNames { + d := newDict() + d.Load(v) + dicts = append(dicts, d) + if len(dictNames) == 1 { + fmt.Println("=> ", v) + } else { + fmt.Println("=> ", v, "\t耗时:", time.Since(mid)) + mid = time.Now() + } + } + fmt.Printf("载入码表耗时:%v\n\n", time.Since(dictStartTime)) + + // race + fmt.Println("比赛开始...") + textLenTotal := 0 + var printEnd = func() { + if conf.Split { + if conf.Merge { + fmt.Println("--merge 不会输出分词结果。") + } else { + fmt.Println("已输出分词结果") + } + } + if conf.Stat { + fmt.Println("已输出词条统计数据") + } + if conf.HTML { + fmt.Println("已保存 html 结果") + } + if conf.Json { + fmt.Println("已输出 json 数据") + } + fmt.Printf("共载入 %d 个码表,%d 个文本,总字数 %d,总耗时:%v\n", len(dicts), len(texts), textLenTotal, time.Since(start)) } + + if conf.Merge { + resArr := smq.ParallelMerge(texts, dicts) + for _, res := range resArr { + if !conf.Hidden { + printSep() + Output([]*smq.Result{res}) + } + OutputHTML([]*smq.Result{res}, conf.HTML) + OutPutJson(res, conf.Json) + textLenTotal = res.TextLen + } + printEnd() + return + } + + smq.Parallel(texts, dicts, func(v []*smq.Result) { + if len(v) == 0 { + return + } + textLenTotal += v[0].TextLen + if !conf.Hidden { + printSep() + Output(v) + } + OutputHTML(v, conf.HTML) + for _, res := range v { + OutPutJson(res, conf.Json) + } + }) + printEnd() } diff --git a/go.mod b/go.mod index 40b7e45..198394e 100644 --- a/go.mod +++ b/go.mod @@ -4,15 +4,11 @@ go 1.19 require ( github.com/AlecAivazis/survey/v2 v2.3.6 - github.com/flowerime/goutil v0.2.2 - github.com/flowerime/rose v1.1.1-0.20230416092638-a709f57d8826 github.com/jedib0t/go-pretty/v6 v6.4.6 + github.com/nopdan/ku v0.3.2 github.com/spf13/cobra v1.7.0 ) -// replace github.com/flowerime/goutil => ../goutil -// replace github.com/flowerime/rose => ../rose - require ( github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -25,7 +21,7 @@ require ( github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.8.1 // indirect golang.org/x/net v0.9.0 // indirect - golang.org/x/sys v0.7.0 // indirect - golang.org/x/term v0.7.0 // indirect + golang.org/x/sys v0.8.0 // indirect + golang.org/x/term v0.8.0 // indirect golang.org/x/text v0.9.0 // indirect ) diff --git a/go.sum b/go.sum index 9e3f1da..fab1b58 100644 --- a/go.sum +++ b/go.sum @@ -8,10 +8,6 @@ github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/flowerime/goutil v0.2.2 h1:teAE8BSB+PI0wCX+4n5W7ErTWTHThac98JjQ2LZN0h8= -github.com/flowerime/goutil v0.2.2/go.mod h1:UFcJNB+mJSFg5OhAgc3Ct0Wy8narM8cEmoWVnMECDhk= -github.com/flowerime/rose v1.1.1-0.20230416092638-a709f57d8826 h1:YF3RzPI5rmkP3jVsHDeGfRD2q6PFB2YvYiBK+5pTiNM= -github.com/flowerime/rose v1.1.1-0.20230416092638-a709f57d8826/go.mod h1:yr+41kRPzPWmXPwNskkG1LEoA6fv2hKY5Hoa+jGT57w= github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f h1:3BSP1Tbs2djlpprl7wCLuiqMaUh5SJkkzI2gDs+FgLs= github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14= github.com/hinshun/vt10x v0.0.0-20220119200601-820417d04eec h1:qv2VnGeEQHchGaZ/u7lxST/RaJw+cv273q79D81Xbog= @@ -35,6 +31,8 @@ github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d h1:5PJl274Y63IEHC+7izoQE9x6ikvDFZS2mDVS3drnohI= github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= +github.com/nopdan/ku v0.3.2 h1:zitkdv9keo9C9uNv2rCkdOUUFVatQlOiPSeEPiyDisg= +github.com/nopdan/ku v0.3.2/go.mod h1:fg8KnpxC9Dn2+Cqk/v15uwTv5NI5TwalFbj2tXtnWfM= github.com/pkg/profile v1.6.0/go.mod h1:qBsxPvzyUincmltOk6iyRVxHYg4adc0OFOv72ZdLa18= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -63,11 +61,11 @@ golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20210503060354-a79de5458b56/go.mod h1:tfny5GFUkzUvx4ps4ajbZsCe5lw1metzhBm9T3x7oIY= -golang.org/x/term v0.7.0 h1:BEvjmm5fURWqcfbSKTdpkDXYBrUS1c0m8agp14W48vQ= -golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= diff --git a/internal/gen/gen.go b/internal/gen/gen.go index b16e660..04746bf 100644 --- a/internal/gen/gen.go +++ b/internal/gen/gen.go @@ -2,9 +2,6 @@ package gen import ( "sort" - "strconv" - - "github.com/flowerime/rose/pkg/rose" ) type Config struct { @@ -16,44 +13,32 @@ type Config struct { SortByWordLen bool // 按照词长重新排序 } -func (c *Config) Gen() rose.WubiTable { - var wl rose.WordLibrary - var ct rose.CodeTable +type Entry struct { + Word string + Code string + Pos int +} - // 极速赛码表格式 - if c.Format == "jisu" { - wl = c.ReadJisu() - ct = wl.ToCodeTable() - } else { - d := rose.Parse(c.Path, c.Format) - ct = d.ToCodeTable() +func (c *Config) Gen() []*Entry { + var dict []*Entry + + switch c.Format { + case "jisu", "js": + dict = c.LoadJisu() + case "duoduo", "dd": + dict = c.LoadTSV(true) + case "bingling", "bl": + dict = c.LoadTSV(false) + default: + panic("不支持的格式: " + c.Format) } - wt := ct.ToWubiTable() - for i := range wt { - if wt[i].Pos <= 0 { - wt[i].Pos = 1 - } - wt[i].Code = c.addSuffix(wt[i].Code, wt[i].Pos) - } if c.SortByWordLen { - sort.SliceStable(wt, func(i, j int) bool { - return len([]rune(wt[i].Word)) > len([]rune(wt[j].Word)) + sort.SliceStable(dict, func(i, j int) bool { + return len([]rune(dict[i].Word)) > len([]rune(dict[j].Word)) }) } - return wt -} - -// 加上选重键 -func (c Config) addSuffix(s string, pos int) string { - if pos != 1 || len(s) < c.PushStart { - if int(pos) <= len(c.SelectKeys) { - s += string(c.SelectKeys[pos-1]) - } else { - s += strconv.Itoa(pos) - } - } - return s + return dict } // 专用,两位正数 1~99 byte 转 string diff --git a/internal/gen/jisu.go b/internal/gen/jisu.go index 81ab0c0..e70b62a 100644 --- a/internal/gen/jisu.go +++ b/internal/gen/jisu.go @@ -5,35 +5,34 @@ import ( "strconv" "strings" - util "github.com/flowerime/goutil" - "github.com/flowerime/rose/pkg/rose" + "github.com/nopdan/ku" ) -func (c *Config) ReadJisu() rose.WordLibrary { - ret := make(rose.WordLibrary, 0, 1e5) - rd, err := util.Read(c.Path) +func (c *Config) LoadJisu() []*Entry { + ret := make([]*Entry, 0, 1e5) + rd, err := ku.Read(c.Path) if err != nil { panic(err) } scan := bufio.NewScanner(rd) - for scan.Scan() { wc := strings.Split(scan.Text(), "\t") if len(wc) != 2 { continue } - code := wc[1] + word, code := wc[0], wc[1] + // 带空格 a_ aa_ - if len(code)-1 > 0 && code[len(code)-1] == '_' { - ret = append(ret, &rose.WubiEntry{wc[0], code, 1}) + if len(code) > 1 && code[len(code)-1] == '_' { + ret = append(ret, &Entry{word, code, 1}) continue } - code, suf := FindSuffixInteger(code) + pre, suf := FindSuffixInteger(code) // 不带数字 akdb ksdw if suf == "" { - ret = append(ret, &rose.WubiEntry{wc[0], code, 1}) + ret = append(ret, &Entry{word, code, 1}) continue } @@ -42,24 +41,25 @@ func (c *Config) ReadJisu() rose.WordLibrary { if pos <= 0 { pos = 10 } - if len(c.SelectKeys) >= pos { - code += string(c.SelectKeys[pos-1]) + // 添加自定义选重键 + if pos <= len(c.SelectKeys) { + code = pre + string(c.SelectKeys[pos-1]) } // fmt.Println(wc[0], code, pos) - ret = append(ret, &rose.WubiEntry{wc[0], code, pos}) + ret = append(ret, &Entry{wc[0], code, pos}) } return ret } // 查找末尾数字,返回前缀和后缀 func FindSuffixInteger(s string) (string, string) { - var preffix, suffix string + var prefix, suffix string for i := len(s) - 1; i >= 0; i-- { if s[i] >= '0' && s[i] <= '9' { suffix = string(s[i]) + suffix } else { - preffix = s[:i+1] - return preffix, suffix + prefix = s[:i+1] + return prefix, suffix } } // 全是数字 diff --git a/internal/gen/tsv.go b/internal/gen/tsv.go new file mode 100644 index 0000000..7fb1665 --- /dev/null +++ b/internal/gen/tsv.go @@ -0,0 +1,54 @@ +package gen + +import ( + "bufio" + "strconv" + "strings" + + "github.com/nopdan/ku" +) + +// 加载多多码表 +func (c *Config) LoadTSV(wordFirst bool) []*Entry { + ret := make([]*Entry, 0, 1e5) + rd, err := ku.Read(c.Path) + if err != nil { + panic(err) + } + // 统计编码出现的次数 + stat := make(map[string]int) + scan := bufio.NewScanner(rd) + for scan.Scan() { + wc := strings.Split(scan.Text(), "\t") + if len(wc) < 2 { + continue + } + word, code := wc[0], wc[1] + if !wordFirst { + word, code = code, word + } + stat[code]++ + pos := stat[code] + code = c.addSuffix(code, pos) + ret = append(ret, &Entry{word, code, pos}) + } + return ret +} + +// 加上选重键,pos 是编码出现的次数,最小为 1 +func (c Config) addSuffix(code string, pos int) string { + // 大于等于起顶码长,首选不用添加空格 _ + if len(code) >= c.PushStart { + if pos == 1 { + return code + } + } + + // 添加自定义选重键 + if pos <= len(c.SelectKeys) { + code += string(c.SelectKeys[pos-1]) + } else { + code += strconv.Itoa(pos) + } + return code +} diff --git a/internal/gen/write.go b/internal/gen/write.go index 854bd6d..38e21a1 100644 --- a/internal/gen/write.go +++ b/internal/gen/write.go @@ -5,21 +5,19 @@ import ( "fmt" "os" "strconv" - - "github.com/flowerime/rose/pkg/rose" ) // 输出赛码表 -func Write(t rose.WubiTable, path string) { +func Write(dict []*Entry, path string) { var buf bytes.Buffer - buf.Grow(len(t)) - for i := range t { - buf.WriteString(t[i].Word) + buf.Grow(len(dict)) + for _, entry := range dict { + buf.WriteString(entry.Word) buf.WriteByte('\t') - buf.WriteString(t[i].Code) - if t[i].Pos != 1 { + buf.WriteString(entry.Code) + if entry.Pos != 1 { buf.WriteByte('\t') - buf.WriteString(strconv.Itoa(t[i].Pos)) + buf.WriteString(strconv.Itoa(entry.Pos)) } buf.WriteByte('\n') } diff --git a/pkg/smq/dict.go b/pkg/smq/dict.go index 72580c0..14e8c06 100644 --- a/pkg/smq/dict.go +++ b/pkg/smq/dict.go @@ -8,7 +8,7 @@ import ( "strings" "github.com/flowerime/gosmq/pkg/matcher" - util "github.com/flowerime/goutil" + "github.com/nopdan/ku" ) type Dict struct { @@ -28,13 +28,13 @@ type Dict struct { // 从文件加载码表 func (dict *Dict) Load(path string) { - rd, err := util.Read(path) + rd, err := ku.Read(path) if err != nil { fmt.Println("Warning! 读取文件失败:", err) return } if dict.Name == "" { - dict.Name = util.GetFileName(path) + dict.Name = ku.GetFileName(path) } dict.reader = rd dict.init() diff --git a/pkg/smq/smq.go b/pkg/smq/smq.go index 83229ea..e2cce15 100644 --- a/pkg/smq/smq.go +++ b/pkg/smq/smq.go @@ -7,7 +7,7 @@ import ( "strings" "sync" - util "github.com/flowerime/goutil" + "github.com/nopdan/ku" ) type Text struct { @@ -19,7 +19,7 @@ type Text struct { // 从文件添加文本 func (t *Text) Load(path string) error { - t.Name = util.GetFileName(path) + t.Name = ku.GetFileName(path) f, err := os.Open(path) if err != nil { return err @@ -33,7 +33,7 @@ func (t *Text) Load(path string) error { t.bufLen = 256 << 10 } // fmt.Println("buffer size", s.bufLen) - t.reader = util.NewReader(f) + t.reader = ku.NewReader(f) return nil }