From b19bf474d317b857955b12035d2c5acb57ce8b01 Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Thu, 6 Jul 2017 15:46:35 +0200 Subject: [PATCH] language: added PreferSameScript MatchOption and added match options. Change-Id: I7c772740515ff557b30fa03be034622b49ea544a Reviewed-on: https://go-review.googlesource.com/47592 Run-TryBot: Marcel van Lohuizen Reviewed-by: Nigel Tao --- language/match.go | 57 +++++++++++++++++++++++++++--------------- language/match_test.go | 16 ++++++------ 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/language/match.go b/language/match.go index 71f1258dc..63bc744a3 100644 --- a/language/match.go +++ b/language/match.go @@ -6,6 +6,16 @@ package language import "errors" +// A MatchOption configures a Matcher. +type MatchOption func(*matcher) + +// PreferSameScript will, in the absence of a match, result in the first +// preferred tag with the same script as a supported tag to match this supported +// tag. The default is currently true, but this may change in the future. +func PreferSameScript(preferSame bool) MatchOption { + return func(m *matcher) { m.preferSameScript = preferSame } +} + // Matcher is the interface that wraps the Match method. // // Match returns the best match for any of the given tags, along with @@ -36,8 +46,8 @@ func Comprehends(speaker, alternative Tag) Confidence { // matched tag in t, but is augmented with the Unicode extension ('u')of the // corresponding preferred tag. This allows user locale options to be passed // transparently. -func NewMatcher(t []Tag) Matcher { - return newMatcher(t) +func NewMatcher(t []Tag, options ...MatchOption) Matcher { + return newMatcher(t, options) } func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) { @@ -47,18 +57,20 @@ func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) { } else { // TODO: this should be an option t = m.default_.tag - outer: - for _, w := range want { - script, _ := w.Script() - if script.scriptID == 0 { - // Don't do anything if there is no script, such as with - // private subtags. - continue - } - for i, h := range m.supported { - if script.scriptID == h.maxScript { - t, index = h.tag, i - break outer + if m.preferSameScript { + outer: + for _, w := range want { + script, _ := w.Script() + if script.scriptID == 0 { + // Don't do anything if there is no script, such as with + // private subtags. + continue + } + for i, h := range m.supported { + if script.scriptID == h.maxScript { + t, index = h.tag, i + break outer + } } } } @@ -407,10 +419,11 @@ func minimizeTags(t Tag) (Tag, error) { // matcher keeps a set of supported language tags, indexed by language. type matcher struct { - default_ *haveTag - supported []*haveTag - index map[langID]*matchHeader - passSettings bool + default_ *haveTag + supported []*haveTag + index map[langID]*matchHeader + passSettings bool + preferSameScript bool } // matchHeader has the lists of tags for exact matches and matches based on @@ -521,9 +534,13 @@ func toConf(d uint8) Confidence { // newMatcher builds an index for the given supported tags and returns it as // a matcher. It also expands the index by considering various equivalence classes // for a given tag. -func newMatcher(supported []Tag) *matcher { +func newMatcher(supported []Tag, options []MatchOption) *matcher { m := &matcher{ - index: make(map[langID]*matchHeader), + index: make(map[langID]*matchHeader), + preferSameScript: true, + } + for _, o := range options { + o(m) } if len(supported) == 0 { m.default_ = &haveTag{} diff --git a/language/match_test.go b/language/match_test.go index cbb847cdd..4fb56a025 100644 --- a/language/match_test.go +++ b/language/match_test.go @@ -381,7 +381,7 @@ func parseSupported(list string) (out []Tag) { func TestBestMatch(t *testing.T) { for _, tt := range matchTests { supported := parseSupported(tt.supported) - m := newMatcher(supported) + m := newMatcher(supported, nil) if *verbose { fmt.Printf("%s:\n%v\n", tt.comment, m) } @@ -475,7 +475,7 @@ var benchWant = [][]Tag{ } func BenchmarkMatch(b *testing.B) { - m := newMatcher(benchHave) + m := newMatcher(benchHave, nil) for i := 0; i < b.N; i++ { for _, want := range benchWant { m.getBest(want...) @@ -485,7 +485,7 @@ func BenchmarkMatch(b *testing.B) { func BenchmarkMatchExact(b *testing.B) { want := mk("en") - m := newMatcher(benchHave) + m := newMatcher(benchHave, nil) for i := 0; i < b.N; i++ { m.getBest(want) } @@ -493,7 +493,7 @@ func BenchmarkMatchExact(b *testing.B) { func BenchmarkMatchAltLanguagePresent(b *testing.B) { want := mk("hr") - m := newMatcher(benchHave) + m := newMatcher(benchHave, nil) for i := 0; i < b.N; i++ { m.getBest(want) } @@ -501,7 +501,7 @@ func BenchmarkMatchAltLanguagePresent(b *testing.B) { func BenchmarkMatchAltLanguageNotPresent(b *testing.B) { want := mk("nn") - m := newMatcher(benchHave) + m := newMatcher(benchHave, nil) for i := 0; i < b.N; i++ { m.getBest(want) } @@ -509,7 +509,7 @@ func BenchmarkMatchAltLanguageNotPresent(b *testing.B) { func BenchmarkMatchAltScriptPresent(b *testing.B) { want := mk("zh-Hant-CN") - m := newMatcher(benchHave) + m := newMatcher(benchHave, nil) for i := 0; i < b.N; i++ { m.getBest(want) } @@ -517,7 +517,7 @@ func BenchmarkMatchAltScriptPresent(b *testing.B) { func BenchmarkMatchAltScriptNotPresent(b *testing.B) { want := mk("fr-Cyrl") - m := newMatcher(benchHave) + m := newMatcher(benchHave, nil) for i := 0; i < b.N; i++ { m.getBest(want) } @@ -525,7 +525,7 @@ func BenchmarkMatchAltScriptNotPresent(b *testing.B) { func BenchmarkMatchLimitedExact(b *testing.B) { want := []Tag{mk("he-NL"), mk("iw-NL")} - m := newMatcher(benchHave) + m := newMatcher(benchHave, nil) for i := 0; i < b.N; i++ { m.getBest(want...) }