diff --git a/cmd/output.go b/cmd/output.go
index f6d2f30..c5f0003 100644
--- a/cmd/output.go
+++ b/cmd/output.go
@@ -88,9 +88,9 @@ func Output(data []*result.Result) {
t.AppendHeader(table.Row{"总键数", "码长", "十击速度", "非汉字数", "计数", "缺字数", "计数"})
for _, res := range data {
t.AppendRow([]any{
- res.CodeLen.Total,
- fmt.Sprintf("%.4f", res.CodeLen.PerChar),
- fmt.Sprintf("%.2f", 600/res.CodeLen.PerChar),
+ res.Keys.Count,
+ fmt.Sprintf("%.4f", res.Keys.CodeLen),
+ fmt.Sprintf("%.2f", 600/res.Keys.CodeLen),
res.Han.NotHans, res.Han.NotHanCount,
res.Han.Lacks, res.Han.LackCount,
})
@@ -101,19 +101,16 @@ func Output(data []*result.Result) {
t = table.NewWriter()
t.AppendHeader(table.Row{"首选词", "打词", "--", "打词字数", "--", "选重", "--", "选重字数", "--"})
for _, res := range data {
- commitRate := func(x int) float64 {
- return div(x, res.Commit.Count)
- }
t.AppendRow([]any{
res.Commit.WordFirst,
res.Commit.Word,
- fmt.Sprintf("%.2f%%", 100*commitRate(res.Commit.Word)),
- res.Commit.WordChars,
- fmt.Sprintf("%.2f%%", 100*div(res.Commit.WordChars, res.Info.TextLen)),
+ commitRate(res.Commit.Word, res),
+ res.Char.Word,
+ charRate(res.Char.Word, res),
res.Commit.Collision,
- fmt.Sprintf("%.2f%%", 100*commitRate(res.Commit.Collision)),
- res.Commit.CollisionChars,
- fmt.Sprintf("%.2f%%", 100*div(res.Commit.CollisionChars, res.Info.TextLen)),
+ commitRate(res.Commit.Collision, res),
+ res.Char.Collision,
+ charRate(res.Char.Collision, res),
})
}
t.SetStyle(table.StyleColoredBright)
@@ -123,12 +120,12 @@ func Output(data []*result.Result) {
t.AppendHeader(table.Row{"左手", "右手", "左右", "右左", "左左", "右右"})
for _, res := range data {
t.AppendRow([]any{
- fmt.Sprintf("%.2f%%", 100*div(res.LeftHand, res.CodeLen.Total)),
- fmt.Sprintf("%.2f%%", 100*div(res.RightHand, res.CodeLen.Total)),
- fmt.Sprintf("%.2f%%", 100*div(res.Pair.LeftToRight, res.Pair.Count)),
- fmt.Sprintf("%.2f%%", 100*div(res.Pair.RightToLeft, res.Pair.Count)),
- fmt.Sprintf("%.2f%%", 100*div(res.Pair.LeftToLeft, res.Pair.Count)),
- fmt.Sprintf("%.2f%%", 100*div(res.Pair.RightToRight, res.Pair.Count)),
+ keyRate(res.Keys.LeftHand, res),
+ keyRate(res.Keys.RightHand, res),
+ pairRate(res.Pair.LeftToRight, res),
+ pairRate(res.Pair.RightToLeft, res),
+ pairRate(res.Pair.LeftToLeft, res),
+ pairRate(res.Pair.RightToRight, res),
})
}
t.SetStyle(table.StyleColoredBright)
@@ -137,20 +134,17 @@ func Output(data []*result.Result) {
t = table.NewWriter()
t.AppendHeader(table.Row{"当量", "异手", "同指", "三连击", "两连击", "小跨排", "大跨排", "异指", "小指干扰", "错手"})
for _, res := range data {
- pairRate := func(x int) float64 {
- return div(x, res.Pair.Count)
- }
t.AppendRow([]any{
- fmt.Sprintf("%.4f", pairRate(int(res.Equivalent))),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.DiffHand)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.SameFinger)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.TribleHit)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.DoubleHit)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.SingleSpan)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.MultiSpan)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.DiffFinger)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.Disturb)),
- fmt.Sprintf("%.2f%%", 100*pairRate(res.Pair.Staggered)),
+ fmt.Sprintf("%.4f", float64(res.Pair.Equivalent)/float64(res.Pair.Count)),
+ pairRate(res.Pair.DiffHand, res),
+ pairRate(res.Pair.SameFinger, res),
+ pairRate(res.Pair.TribleHit, res),
+ pairRate(res.Pair.DoubleHit, res),
+ pairRate(res.Pair.SingleSpan, res),
+ pairRate(res.Pair.MultiSpan, res),
+ pairRate(res.Pair.DiffFinger, res),
+ pairRate(res.Pair.Disturb, res),
+ pairRate(res.Pair.Staggered, res),
})
}
t.SetStyle(table.StyleColoredBright)
@@ -161,7 +155,7 @@ func Output(data []*result.Result) {
for _, res := range data {
newRow := []any{}
for i := 1; i < 11; i++ {
- newRow = append(newRow, fmt.Sprintf("%.2f%%", 100*div(res.Dist.Finger[i], res.CodeLen.Total)))
+ newRow = append(newRow, keyRate(res.Dist.Finger[i], res))
}
t.AppendRow(newRow)
}
@@ -173,7 +167,7 @@ func Output(data []*result.Result) {
row := []any{}
for i := range len(keys) {
header = append(header, string(keys[i]))
- row = append(row, fmt.Sprintf("%.2f%%", 100*res.Keys[string(keys[i])].Rate))
+ row = append(row, keyRate(res.Dist.Key[string(keys[i])], res))
}
writer := table.NewWriter()
writer.AppendHeader(header)
@@ -193,6 +187,22 @@ func Output(data []*result.Result) {
fmt.Println("----------------------")
}
-func div(x, y int) float64 {
- return float64(x) / float64(y)
+func commitRate(count int, res *result.Result) string {
+ rate := float64(count) / float64(res.Commit.Count)
+ return fmt.Sprintf("%.2f%%", 100*rate)
+}
+
+func charRate(count int, res *result.Result) string {
+ rate := float64(count) / float64(res.Char.Count)
+ return fmt.Sprintf("%.2f%%", 100*rate)
+}
+
+func keyRate(count int, res *result.Result) string {
+ rate := float64(count) / float64(res.Keys.Count)
+ return fmt.Sprintf("%.2f%%", 100*rate)
+}
+
+func pairRate(count int, res *result.Result) string {
+ rate := float64(count) / float64(res.Pair.Count)
+ return fmt.Sprintf("%.2f%%", 100*rate)
}
diff --git a/frontend/src/components/Data.ts b/frontend/src/components/Data.ts
index 49b6897..3dcc6c3 100644
--- a/frontend/src/components/Data.ts
+++ b/frontend/src/components/Data.ts
@@ -2,29 +2,19 @@
export interface Data {
Info: Info;
- Commit: Commit;
- Pair: Pair;
- Keys: { [key: string]: Key };
+ Commit: Char;
+ Char: Char;
Han: Han;
+ Pair: Pair;
+ Keys: Keys;
Dist: Dist;
- CodeLen: CodeLen;
- LeftHand: number;
- RightHand: number;
- Equivalent: number;
}
-export interface CodeLen {
- Total: number;
- PerChar: number;
-}
-
-export interface Commit {
+export interface Char {
Count: number;
Word: number;
- WordChars: number;
WordFirst: number;
Collision: number;
- CollisionChars: number;
}
export interface Dist {
@@ -32,6 +22,7 @@ export interface Dist {
WordLen: number[];
Collision: number[];
Finger: number[];
+ Key: { [key: string]: number };
}
export interface Han {
@@ -51,13 +42,16 @@ export interface Info {
Single: boolean;
}
-export interface Key {
+export interface Keys {
Count: number;
- Rate: number;
+ CodeLen: number;
+ LeftHand: number;
+ RightHand: number;
}
export interface Pair {
Count: number;
+ Equivalent: number;
SameFinger: number;
DoubleHit: number;
TribleHit: number;
diff --git a/frontend/src/components/MultiResult.vue b/frontend/src/components/MultiResult.vue
index 4c0d7e9..be7c77d 100644
--- a/frontend/src/components/MultiResult.vue
+++ b/frontend/src/components/MultiResult.vue
@@ -14,10 +14,10 @@ const res = computed(() => {
let cp = JSON.parse(JSON.stringify(props.data));
cp.Commit.Word = commitRate(cp.Commit.Word);
cp.Commit.Collision = commitRate(cp.Commit.Collision);
- cp.Commit.WordChars = charRate(cp.Commit.WordChars);
- cp.Commit.CollisionChars = charRate(cp.Commit.CollisionChars);
- cp.LeftHand = keyRate(cp.LeftHand);
- cp.RightHand = keyRate(cp.RightHand);
+ cp.Char.Word = charRate(cp.Char.Word);
+ cp.Char.Collision = charRate(cp.Char.Collision);
+ cp.Keys.LeftHand = keyRate(cp.Keys.LeftHand);
+ cp.Keys.RightHand = keyRate(cp.Keys.RightHand);
cp.Pair.SameHand = pairRate(cp.Pair.SameHand);
cp.Pair.DiffHand = pairRate(cp.Pair.DiffHand);
cp.Pair.SameFinger = pairRate(cp.Pair.SameFinger);
@@ -40,11 +40,11 @@ function commitRate(count: number): string {
}
function charRate(count: number): string {
- return ((count / props.data.Info.TextLen) * 100).toFixed(2) + "%";
+ return ((count / props.data.Char.Count) * 100).toFixed(2) + "%";
}
function keyRate(count: number): string {
- return ((count / props.data.CodeLen.Total) * 100).toFixed(2) + "%";
+ return ((count / props.data.Keys.Count) * 100).toFixed(2) + "%";
}
function pairRate(count: number): string {
@@ -82,8 +82,8 @@ function dist(dist: number[]) {
选重 |
- {{ data.CodeLen.PerChar.toFixed(2) }} |
- {{ data.CodeLen.Total }} |
+ {{ data.Keys.CodeLen.toFixed(4) }} |
+ {{ data.Keys.Count }} |
{{ data.Commit.Count }} |
{{ res.Commit.Word }} |
{{ res.Commit.Collision }} |
@@ -100,8 +100,8 @@ function dist(dist: number[]) {
{{ data.Info.DictLen }} |
{{ data.Han.Lacks }} |
{{ data.Info.TextLen }} |
- {{ res.Commit.WordChars }} |
- {{ res.Commit.CollisionChars }} |
+ {{ res.Char.Word }} |
+ {{ res.Char.Collision }} |
@@ -116,7 +116,7 @@ function dist(dist: number[]) {
右右 |
- {{ res.LeftHand }} |
+ {{ res.Keys.LeftHand }} |
{{ res.Pair.LeftToLeft }} |
{{ res.Pair.LeftToRight }} |
{{ res.Pair.RightToLeft }} |
@@ -130,7 +130,7 @@ function dist(dist: number[]) {
大跨排 |
- {{ res.RightHand }} |
+ {{ res.Keys.RightHand }} |
{{ res.Pair.SameFinger }} |
{{ res.Pair.DoubleHit }} |
{{ res.Pair.SingleSpan }} |
@@ -144,7 +144,7 @@ function dist(dist: number[]) {
小指干扰 |
- {{ (data.Equivalent / data.Pair.Count).toFixed(4) }} |
+ {{ (data.Pair.Equivalent / data.Pair.Count).toFixed(4) }} |
{{ res.Pair.DiffHand }} |
{{ res.Pair.DiffFinger }} |
{{ res.Pair.Staggered }} |
diff --git a/frontend/src/components/Show.vue b/frontend/src/components/Show.vue
index eeb2273..ca97286 100644
--- a/frontend/src/components/Show.vue
+++ b/frontend/src/components/Show.vue
@@ -55,10 +55,10 @@ const multi = ref(true);
-
+
+
+ 标题
+
diff --git a/pkg/data/dict.go b/pkg/data/dict.go
index 52a72b6..8a7dc51 100644
--- a/pkg/data/dict.go
+++ b/pkg/data/dict.go
@@ -1,7 +1,9 @@
package data
import (
+ "fmt"
"sync"
+ "time"
"github.com/nopdan/gosmq/pkg/matcher"
"github.com/nopdan/gosmq/pkg/util"
@@ -50,6 +52,7 @@ func (d *Dict) Init() {
return
}
}
+ now := time.Now()
// 选重键
d.selectKeys = make([]string, 0, 10)
for i := range len(d.SelectKeys) {
@@ -100,6 +103,7 @@ func (d *Dict) Init() {
d.IsInit = true
if dict == nil || len(dict) == 0 {
d.Matcher.Build()
+ logger.Info(fmt.Sprintf("已载入码表: %s", d.Text.Name), "耗时", time.Since(now))
return
}
diff --git a/pkg/matcher/single.go b/pkg/matcher/single.go
index ed70844..90ecca0 100644
--- a/pkg/matcher/single.go
+++ b/pkg/matcher/single.go
@@ -2,6 +2,7 @@ package matcher
import (
"bytes"
+ "unicode"
"unicode/utf8"
)
@@ -49,6 +50,9 @@ func (s *Single) Match(brd *bytes.Reader, res *Result) {
res.Char = ch
res.Size = size
res.Length = 1
+ if unicode.IsSpace(ch) {
+ return
+ }
if v, ok := s.dict[ch]; ok {
res.Code = v.code
res.Pos = v.pos
diff --git a/pkg/matcher/trie.go b/pkg/matcher/trie.go
index dd02c2a..8e9453a 100644
--- a/pkg/matcher/trie.go
+++ b/pkg/matcher/trie.go
@@ -3,6 +3,7 @@ package matcher
import (
"bytes"
"io"
+ "unicode"
)
type Trie struct {
@@ -65,18 +66,21 @@ func (t *Trie) Match(brd *bytes.Reader, res *Result) {
var Size, Length int
var order int
for {
- char, size, err := brd.ReadRune()
+ ch, size, err := brd.ReadRune()
if err != nil {
break
}
if Char == 0 {
- Char = char
+ Char = ch
CharSize = size
}
Size += size
Length++
- node = node.ch[char]
+ if unicode.IsSpace(ch) {
+ break
+ }
+ node = node.ch[ch]
if node == nil {
break
}
diff --git a/pkg/result/match_res.go b/pkg/result/match_res.go
index c7190aa..3c124ef 100644
--- a/pkg/result/match_res.go
+++ b/pkg/result/match_res.go
@@ -34,11 +34,11 @@ type MatchRes struct {
// 每个词条对应的编码,以及出现的次数
StatData map[string]*CodePosCount
- Equivalent float64 // 总当量
-
- Dist dist
- Commit commit
- Pair pair
+ TextLen int // 文章总字数,不管有没有匹配到
+ Dist dist
+ Commit commit
+ Char char
+ Pair pair
}
type dist struct {
@@ -54,15 +54,20 @@ type dist struct {
type commit struct {
Count int // 上屏数
Word int // 打词数
- WordChars int // 打词字数
WordFirst int // 首选词
+ Collision int // 选重
+}
- Collision int // 选重
- CollisionChars int // 选重字数
+type char struct {
+ Count int // 总匹配字数
+ Word int // 打词字数
+ WordFirst int // 首选词字数
+ Collision int // 选重字数
}
type pair struct {
- Count int // 按键组合数
+ Count int // 按键组合数
+ Equivalent float64 // 总当量
SameFinger int // 同手同指
DoubleHit int // 同键双击
@@ -130,16 +135,20 @@ func (m *MatchRes) Combine(mRes *MatchRes) {
for i := range mRes.Dist.Collision {
util.AddTo(mRes.Dist.Collision[i], &m.Dist.Collision, i)
}
- m.Equivalent += mRes.Equivalent
+ m.TextLen += mRes.TextLen
m.Commit.Count += mRes.Commit.Count
m.Commit.Word += mRes.Commit.Word
- m.Commit.WordChars += mRes.Commit.WordChars
m.Commit.WordFirst += mRes.Commit.WordFirst
m.Commit.Collision += mRes.Commit.Collision
- m.Commit.CollisionChars += mRes.Commit.CollisionChars
+
+ m.Char.Count += mRes.Char.Count
+ m.Char.Word += mRes.Char.Word
+ m.Char.WordFirst += mRes.Char.WordFirst
+ m.Char.Collision += mRes.Char.Collision
m.Pair.Count += mRes.Pair.Count
+ m.Pair.Equivalent += mRes.Pair.Equivalent
m.Pair.SameFinger += mRes.Pair.SameFinger
m.Pair.DoubleHit += mRes.Pair.DoubleHit
m.Pair.TribleHit += mRes.Pair.TribleHit
diff --git a/pkg/result/result.go b/pkg/result/result.go
index 68e50a4..8d36367 100644
--- a/pkg/result/result.go
+++ b/pkg/result/result.go
@@ -1,35 +1,25 @@
package result
-// count and rate
-type CountRate struct {
- Count int
- Rate float64
-}
-
type Result struct {
segments []segment
statData map[string]*CodePosCount
Info Info // 文章和码表信息
Commit commit // 上屏
+ Char char // 上屏字数
+ Han han // 非汉字以及缺字
Pair pair // 按键组合
Keys keys // 按键统计
- Han han // 非汉字以及缺字
// 各种分布
Dist struct {
CodeLen []int // 码长
WordLen []int // 词长
Collision []int // 选重
Finger [11]int // 手指
+
+ // 按键 左空格_,右空格+
+ Key map[string]int
}
- // 码长
- CodeLen struct {
- Total int
- PerChar float64
- }
- LeftHand int // 左手按键数
- RightHand int // 右手按键数
- Equivalent float64 // 总当量
}
type Info struct {
@@ -49,17 +39,9 @@ type han struct {
LackCount int // 缺字计数
}
-// 码长
-type codeLen struct {
- Total int // 全部码长
- PerChar float64 // 字均码长
- Dist []int // 码长分布统计
-}
-
-// 按键 左空格_,右空格+
-type keys map[string]CountRate
-
-type hands struct {
- Left CountRate // 左手
- Right CountRate // 右手
+type keys struct {
+ Count int // 按键数
+ CodeLen float64 // 字均码长
+ LeftHand int // 左手按键数
+ RightHand int // 右手按键数
}
diff --git a/pkg/result/stat.go b/pkg/result/stat.go
index 781bfad..114f464 100644
--- a/pkg/result/stat.go
+++ b/pkg/result/stat.go
@@ -15,42 +15,37 @@ func (m *MatchRes) Stat(info *Info) *Result {
res.segments = m.segments
res.statData = m.StatData
res.Info = *info
- res.Keys = make(map[string]CountRate)
res.Commit = m.Commit
+ res.Char = m.Char
res.Pair = m.Pair
res.Dist.CodeLen = m.Dist.CodeLen
res.Dist.WordLen = m.Dist.WordLen
res.Dist.Collision = m.Dist.Collision
- res.Equivalent = m.Equivalent
+ res.Dist.Key = make(map[string]int)
- // 文章字数
- for i, v := range res.Dist.WordLen {
- res.Info.TextLen += i * v
- }
- // 总码长
+ res.Info.TextLen = m.TextLen
+ // 总码长 == 总按键数
for i, v := range res.Dist.CodeLen {
- res.CodeLen.Total += i * v
+ res.Keys.Count += i * v
}
- // 字均码长
- res.CodeLen.PerChar = div(res.CodeLen.Total, res.Info.TextLen)
+ // 字均码长 = 总按键数 / 总字数
+ res.Keys.CodeLen = div(res.Keys.Count, res.Char.Count)
// 按键分布
for i := byte(33); i < 128; i++ {
- cr := CountRate{}
- cr.Count = m.Dist.Key[i]
- if cr.Count == 0 {
+ count := m.Dist.Key[i]
+ if count == 0 {
continue
}
- cr.Rate = div(cr.Count, res.CodeLen.Total)
- res.Keys[string(i)] = cr
+ res.Dist.Key[string(i)] = count
// 左右手
isLeft, finger := feeling.KeyPos(i)
if isLeft {
- res.LeftHand += cr.Count
+ res.Keys.LeftHand += count
} else {
- res.RightHand += cr.Count
+ res.Keys.RightHand += count
}
// 指法
- res.Dist.Finger[finger] += cr.Count
+ res.Dist.Finger[finger] += count
}
res.Pair.SameHand = res.Pair.LeftToLeft + res.Pair.RightToRight
res.Pair.DiffHand = res.Pair.Count - res.Pair.SameHand
diff --git a/pkg/smq/config.go b/pkg/smq/config.go
index bb9d667..36b886f 100644
--- a/pkg/smq/config.go
+++ b/pkg/smq/config.go
@@ -74,7 +74,7 @@ func (c *Config) Race() [][]*result.Result {
logger.Warn("文本或码表为空", "text", len(c.textList), "dict", len(c.dictList))
return nil
}
- logger.Info("开始赛码", "文本", len(c.textList), "码表", len(c.dictList))
+ logger.Info("开始赛码...", "文本", len(c.textList), "码表", len(c.dictList))
now := time.Now()
// 限制并发数量
ch := make(chan *result.MatchRes, NUM_CPU)
diff --git a/pkg/smq/feel.go b/pkg/smq/feel.go
index ae117b9..becd06e 100644
--- a/pkg/smq/feel.go
+++ b/pkg/smq/feel.go
@@ -79,14 +79,14 @@ func (f *feel) Process(key byte) {
comb := feeling.Combination[f.lastKey][f.key]
// 当量表里找不到
if comb == nil {
- mRes.Equivalent += 2.0
+ mRes.Pair.Equivalent += 2.0
mRes.Pair.Count++
f.step()
f.update()
return
}
- mRes.Equivalent += comb.Equivalent
+ mRes.Pair.Equivalent += comb.Equivalent
mRes.Pair.Count++
f.step()
diff --git a/pkg/smq/match.go b/pkg/smq/match.go
index 8f26b06..fb1a9de 100644
--- a/pkg/smq/match.go
+++ b/pkg/smq/match.go
@@ -17,8 +17,18 @@ func (c *Config) match(buffer []byte, dict *data.Dict) *result.MatchRes {
brd := bytes.NewReader(buffer)
res := new(matcher.Result)
+ hanHandler := func(ch rune) {
+ isHan := unicode.Is(unicode.Han, ch)
+ if isHan {
+ mRes.Dist.LackHan[ch]++
+ } else {
+ mRes.Dist.NotHan[ch]++
+ }
+ }
+
process := func(res *matcher.Result) {
mRes.Commit.Count++
+ mRes.Char.Count += res.Length
util.Increase(&mRes.Dist.WordLen, res.Length)
util.Increase(&mRes.Dist.Collision, res.Pos)
util.Increase(&mRes.Dist.CodeLen, len(res.Code))
@@ -31,14 +41,15 @@ func (c *Config) match(buffer []byte, dict *data.Dict) *result.MatchRes {
}
if res.Pos >= 2 {
mRes.Commit.Collision++
- mRes.Commit.CollisionChars += res.Length
+ mRes.Char.Collision += res.Length
}
// 匹配到词组
if res.Length >= 2 {
mRes.Commit.Word++
- mRes.Commit.WordChars += res.Length
+ mRes.Char.Word += res.Length
if res.Pos == 1 {
mRes.Commit.WordFirst++ // 首选词
+ mRes.Char.WordFirst += res.Length
}
}
if !c.Split && !c.Stat {
@@ -74,16 +85,37 @@ func (c *Config) match(buffer []byte, dict *data.Dict) *result.MatchRes {
}
}
- for brd.Len() > 0 {
- // 跳过空白字符
- ch, _, _ := brd.ReadRune()
- if ch < 33 || ch == 65533 || ch == ' ' {
- continue
+ // 处理两字宽标点符号 破折号 —— 省略号 ……
+ _2Width := func(res *matcher.Result, brd *bytes.Reader) bool {
+ if res.Char != '—' && res.Char != '…' {
+ return false
+ }
+ ch2, _, err := brd.ReadRune()
+ if err == nil || res.Char != ch2 {
+ return false
+ }
+ // 不计打词 Length 保持 1
+ if res.Char == '—' {
+ // 中文破折号 —— 占用 6 字节,不计打词
+ res.SetChar(0).SetSize(6).SetPos(1)
+ res.Code = "=-"
+ process(res)
+ } else if res.Char == '…' {
+ // 中文省略号 …… 占用 6 字节,不计打词
+ res.SetChar(0).SetSize(6).SetPos(1)
+ res.Code = "=6"
+ process(res)
+ } else {
+ _ = brd.UnreadRune()
+ return false
}
- _ = brd.UnreadRune()
+ return true
+ }
+ for brd.Len() > 0 {
// 开始匹配
dict.Matcher.Match(brd, res)
+ mRes.TextLen += res.Length
// 匹配成功
if res.Pos > 0 {
@@ -96,39 +128,27 @@ func (c *Config) match(buffer []byte, dict *data.Dict) *result.MatchRes {
feel.Invalid()
continue
}
- res.Pos = 1
- // 两个字符的符号
- if res.Char == '—' || res.Char == '…' {
- ch2, _, err := brd.ReadRune()
- if err != nil {
- if res.Char == '—' && ch2 == '—' {
- // 中文破折号 —— 占用 6 字节,不计打词
- res.SetChar(0).SetCode("=-").SetSize(6)
- process(res)
- continue
- } else if res.Char == '…' && ch2 == '…' {
- // 中文省略号 …… 占用 6 字节,不计打词
- res.SetChar(0).SetCode("=6").SetSize(6)
- process(res)
- continue
- }
- }
- _ = brd.UnreadRune()
+ // 跳过空白符
+ if unicode.IsSpace(res.Char) {
+ continue
}
+ hanHandler(res.Char)
+
// 单字符符号
punct := convertPunct(res.Char)
if punct != "" {
res.Code = punct
+ res.Pos = 1
process(res)
continue
}
- isHan := unicode.Is(unicode.Han, res.Char)
- if isHan {
- mRes.Dist.LackHan[ch]++
- } else {
- mRes.Dist.NotHan[ch]++
+
+ // 两个字符的符号
+ if ok := _2Width(res, brd); ok {
+ continue
}
+
res.Code = "######"
res.Pos = 0
process(res)