Skip to content

Commit

Permalink
temp
Browse files Browse the repository at this point in the history
  • Loading branch information
moshaad7 committed Aug 12, 2024
1 parent f7fea09 commit b1b9e24
Show file tree
Hide file tree
Showing 47 changed files with 317 additions and 74 deletions.
5 changes: 4 additions & 1 deletion analysis/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ func BenchmarkAnalysis(b *testing.B) {
b.Fatal(err)
}

ts := analyzer.Analyze(bleveWikiArticle)
ts, err := analysis.AnalyzeForTokens(analyzer, bleveWikiArticle)
if err != nil {
b.Fatalf("error analyzing text: %v", err)
}
freqs := analysis.TokenFrequency(ts, nil, index.IncludeTermVectors)
if len(freqs) != 511 {
b.Errorf("expected %d freqs, got %d", 511, len(freqs))
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/ar/analyzer_ar_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,10 @@ func TestArabicAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/cjk/analyzer_cjk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,10 @@ func TestCJKAnalyzer(t *testing.T) {
if err != nil {
t.Fatal(err)
}
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/ckb/analyzer_ckb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ func TestSoraniAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/da/analyzer_da_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ func TestDanishAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/de/analyzer_de_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ func TestGermanAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/en/analyzer_en_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ func TestEnglishAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/es/analyzer_es_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ func TestSpanishAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
Expand Down
15 changes: 12 additions & 3 deletions analysis/lang/fa/analyzer_fa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,10 @@ func TestPersianAnalyzerVerbs(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down Expand Up @@ -600,7 +603,10 @@ func TestPersianAnalyzerVerbsDefective(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down Expand Up @@ -671,7 +677,10 @@ func TestPersianAnalyzerOthers(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/fi/analyzer_fi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func TestFinishAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/fr/analyzer_fr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,10 @@ func TestFrenchAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/hi/analyzer_hi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ func TestHindiAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/hr/analyzer_hr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,10 @@ func TestCroatianAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/hu/analyzer_hu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func TestHungarianAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/it/analyzer_it_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,10 @@ func TestItalianAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/nl/analyzer_nl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func TestDutchAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/no/analyzer_no_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func TestNorwegianAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/pl/analyzer_pl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@ func TestPolishAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/pt/analyzer_pt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func TestPortugueseAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/ro/analyzer_ro_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func TestRomanianAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/ru/analyzer_ru_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ func TestRussianAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/sv/analyzer_sv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ func TestSwedishAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
5 changes: 4 additions & 1 deletion analysis/lang/tr/analyzer_tr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ func TestTurkishAnalyzer(t *testing.T) {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
actual, err := analysis.AnalyzeForTokens(analyzer, test.input)
if err != nil {
t.Fatalf("error analyzing input: %v", err)
}
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
Expand Down
69 changes: 69 additions & 0 deletions analysis/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,75 @@ type Analyzer interface {
Analyze([]byte) TokenStream
}

// -----------------------------------------------------------------------------

type AnalyzerType int
const (
TokensAnalyzer AnalyzerType = iota
VectorAnalyzer
)

type AnalyzerV2 interface {
Type() AnalyzerType
Analyze([]byte) (any, error)
}

func AnalyzeForTokens(analyzer AnalyzerV2, input []byte) (TokenStream, error) {
if analyzer.Type() != TokensAnalyzer {
return nil, fmt.Errorf("incompatible analyzer type")
}

output, err := analyzer.Analyze(input)
if err != nil {
return nil, err
}

rv, ok := output.(TokenStream)
if !ok {
return nil, fmt.Errorf("unexpected output type, expected TokenStream")
}

return rv, nil
}

func AnalyzeForVectors(analyzer AnalyzerV2, input []byte) ([]float32, error) {
if analyzer.Type() != VectorAnalyzer {
return nil, fmt.Errorf("incompatible analyzer type")
}

output, err := analyzer.Analyze(input)
if err != nil {
return nil, err
}

rv, ok := output.([]float32)
if !ok {
return nil, fmt.Errorf("unexpected output type, expected []float32")
}

return rv, nil
}

// -----------------------------------------------------------------------------

// Helper type to allow Analyzer to be used as an AnalyzerV2
//
// An Analyer can simply be wrapped in an AnalyzerAdapter to be used as an
// AnalyzerV2
type AnalyzerAdapter struct {
Analyzer
}

func (a *AnalyzerAdapter) Type() AnalyzerType {
return TokensAnalyzer
}

func (a *AnalyzerAdapter) Analyze(input []byte) (any, error) {
return a.Analyzer.Analyze(input), nil
}

// -----------------------------------------------------------------------------

type DefaultAnalyzer struct {
CharFilters []CharFilter
Tokenizer Tokenizer
Expand Down
2 changes: 1 addition & 1 deletion document/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type Field interface {
// "doc1", then "field" in "doc2".
ArrayPositions() []uint64
Options() index.FieldIndexingOptions
Analyze()
Analyze() error
Value() []byte

// NumPlainTextBytes should return the number of plain text bytes
Expand Down
Loading

0 comments on commit b1b9e24

Please sign in to comment.