Skip to content

Commit

Permalink
Merge pull request #11 from go-ego/m-pr
Browse files Browse the repository at this point in the history
add not cut option and export match function code
  • Loading branch information
vcaesar authored Jul 11, 2020
2 parents e08db80 + 4daa0bc commit dd52367
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 11 deletions.
9 changes: 8 additions & 1 deletion phrase/paragraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,15 @@ func Pinyin(p string, segs ...gse.Segmenter) []string {
// Initial return pinyin initial
func Initial(p string, segs ...gse.Segmenter) (s string) {
a := Pinyin(p, segs...)
return Join(a)
}

// Join []string to string
func Join(a []string) (s string) {
for _, v := range a {
s += string([]rune(v)[0])
if len(v) > 0 {
s += string([]rune(v)[0])
}
}

return
Expand Down
14 changes: 13 additions & 1 deletion phrase/paragraph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,22 @@ func TestPinyin(t *testing.T) {
seg := gse.New("zh, ../examples/dict.txt")
WithGse(seg)

text := "西雅图都会区, 西雅图太空针"

AddDict("都会区", "dū huì qū")
p := Pinyin("西雅图都会区, 西雅图太空针")
p := Pinyin(text)
tt.Equal(t, "[xi ya tu du hui qu, xi ya tu tai kong zhen]", p)

i := Initial("都会区")
tt.Equal(t, "dhq", i)

Cut = false
s := seg.Trim(seg.CutAll(text))
i += ", "
for _, v := range s {
i1 := Initial(v)
i += i1 + " "
}
tt.Equal(t, "dhq, xyt dhq xyt tk z ", i)
Cut = true
}
38 changes: 29 additions & 9 deletions phrase/phrase.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import (
var (
seg gse.Segmenter
loaded bool

// Cut set pinyinPhrase cut
Cut = true
)

// LoadGseDict load the user's gse dict
Expand Down Expand Up @@ -42,19 +45,36 @@ func cutWords(s string, segs ...gse.Segmenter) []string {
return seg.CutAll(s)
}

// Match match word pinyin
func Match(word string) string {
match := phraseDict[word]
if match == "" {
match = DictAdd[word]
}

match = gpy.ToFixed(match, Option)
return match
}

func matchs(s, word string) string {
match := Match(word)
if match != "" {
s = strings.Replace(s, word, " "+match+" ", 1)
}

return s
}

func pinyinPhrase(s string, segs ...gse.Segmenter) string {
words := cutWords(s, segs...)
for _, word := range words {
match := phraseDict[word]
if match == "" {
match = DictAdd[word]
if Cut {
words := cutWords(s, segs...)
for _, word := range words {
s = matchs(s, word)
}

match = gpy.ToFixed(match, Option)
if match != "" {
s = strings.Replace(s, word, " "+match+" ", 1)
}
return s
}

s = matchs(s, s)
return s
}

0 comments on commit dd52367

Please sign in to comment.