Skip to content

Commit

Permalink
ss.Optimize() had a logic bug; augment ss.CleanInput()
Browse files Browse the repository at this point in the history
  • Loading branch information
e-gun committed Nov 4, 2023
1 parent 4f09691 commit 6497473
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 19 deletions.
4 changes: 3 additions & 1 deletion configatlaunch.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,11 @@ func ConfigAtLaunch() {
"projurl": PROJURL,
"vmodel": Config.VectorModel,
"workers": Config.WorkerCount}

t := template.Must(template.New("").Parse(HELPTEXTTEMPLATE))

var b bytes.Buffer
if err := t.Execute(&b, m); err != nil {
if ee := t.Execute(&b, m); ee != nil {
msg(FAIL7, MSGCRIT)
}
fmt.Println(styleoutput(coloroutput(b.String())))
Expand Down
6 changes: 3 additions & 3 deletions fyi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,13 @@ self-test with vectors can be deceptive because `-wc` flag will not override con
43 unique files.
0 files ignored.
github.com/AlDanial/cloc v 1.98 T=0.04 s (1087.2 files/s, 489430.3 lines/s)
github.com/AlDanial/cloc v 1.98 T=0.04 s (1046.2 files/s, 472237.8 lines/s)
-------------------------------------------------------------------------------
Language files blank comment code
-------------------------------------------------------------------------------
Go 43 2889 3193 13275
Go 43 2901 3204 13305
-------------------------------------------------------------------------------
SUM: 43 2889 3193 13275
SUM: 43 2901 3204 13305
-------------------------------------------------------------------------------
```
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ require (
github.com/e-gun/nlp v0.0.0-20230418221101-577c2209ffcc
github.com/e-gun/tsnemp v0.0.0
github.com/e-gun/wego v0.0.11
github.com/go-echarts/go-echarts/v2 v2.3.1
github.com/google/uuid v1.3.1
github.com/go-echarts/go-echarts/v2 v2.3.2
github.com/google/uuid v1.4.0
github.com/gorilla/websocket v1.5.0
github.com/jackc/pgx/v5 v5.4.3
github.com/json-iterator/go v1.1.12
Expand All @@ -23,7 +23,7 @@ require (
github.com/e-gun/sparse v0.0.0-20230418220937-07063da15582 // indirect
github.com/felixge/fgprof v0.9.3 // indirect
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
github.com/google/pprof v0.0.0-20230926050212-f7f687d19a98 // indirect
github.com/google/pprof v0.0.0-20231101202521-4ca4178f5c7a // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
Expand Down
13 changes: 7 additions & 6 deletions rt-getters.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,15 +355,16 @@ func RtGetJSSearchlist(c echo.Context) error {

m := message.NewPrinter(language.English)
sl := SessionIntoSearchlist(sess)
tw := 0

totalwords := 0

var wkk []string
for _, a := range sl.Inc.Authors {
for _, w := range AllAuthors[a].WorkList {
ct := WORKTMPL
cf := m.Sprintf(ct, AllAuthors[a].Cleaname, AllWorks[w].Title, AllWorks[w].WdCount)
wkk = append(wkk, cf)
tw += AllWorks[w].WdCount
totalwords += AllWorks[w].WdCount
}
}

Expand All @@ -372,20 +373,20 @@ func RtGetJSSearchlist(c echo.Context) error {
ct := WORKTMPL
cf := m.Sprintf(ct, thiswk.MyAu().Cleaname, thiswk.Title, thiswk.WdCount)
wkk = append(wkk, cf)
tw += thiswk.WdCount
totalwords += thiswk.WdCount
}

pattern := regexp.MustCompile(REG)
for _, p := range sl.Inc.Passages {
cit, count := searchlistpassages(pattern, p)
wkk = append(wkk, cit)
tw += count
totalwords += count
}

for _, p := range sl.Excl.Passages {
cit, count := searchlistpassages(pattern, p)
wkk = append(wkk, cit+"[EXCLUDED]")
tw -= count
totalwords -= count
}

if len(wkk) > MAXSEARCHINFOLISTLEN {
Expand All @@ -394,7 +395,7 @@ func RtGetJSSearchlist(c echo.Context) error {
wkk = append(wkk, m.Sprintf(SPILLOVER, diff))
}

wkk = append(wkk, m.Sprintf(SUMMARY, tw))
wkk = append(wkk, m.Sprintf(SUMMARY, totalwords))

ht := strings.Join(wkk, "<br>\n")
var j JSStruct
Expand Down
29 changes: 24 additions & 5 deletions searchstructs.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,21 @@ func (s *SearchStruct) CleanInput() {

s.Seeking = Purgechars(dropping, s.Seeking)
s.Proximate = Purgechars(dropping, s.Proximate)

// don't let BoxA be blank if BoxB is not
BoxA := s.Seeking == "" && s.LemmaOne == ""
NotBoxB := s.Proximate != "" || s.LemmaTwo != ""

if BoxA && NotBoxB {
if s.Proximate != "" {
s.Seeking = s.Proximate
s.Proximate = ""
}
if s.LemmaTwo != "" {
s.LemmaOne = s.LemmaTwo
s.LemmaTwo = ""
}
}
}

// SetType - set internal values via self-probe
Expand All @@ -115,15 +130,15 @@ func (s *SearchStruct) SetType() {
s.HasPhraseBoxB = true
}

if len(s.LemmaOne) != 0 {
if s.LemmaOne != "" {
s.HasLemmaBoxA = true
// accented line has "volat" in latin; and "uolo" will not find it
if isGreek.MatchString(s.LemmaOne) {
s.SrchColumn = "accented_line"
}
}

if len(s.LemmaTwo) != 0 {
if s.LemmaTwo != "" {
s.HasLemmaBoxB = true
}

Expand Down Expand Up @@ -258,8 +273,10 @@ func (s *SearchStruct) InclusionOverview(sessincl SearchIncExl) string {
return r
}

// Optimize - consider rewriting the search to make it faster
// Optimize - think about rewriting the search to make it faster
func (s *SearchStruct) Optimize() {
// only zero or one of the following should be true

// if BoxA has a lemma and BoxB has a phrase, it is almost certainly faster to search B, then A...
if s.HasLemmaBoxA && s.HasPhraseBoxB {
s.SwapPhraseAndLemma()
Expand All @@ -272,14 +289,16 @@ func (s *SearchStruct) Optimize() {
return
}

// a single word should be faster than a lemma
if s.HasLemmaBoxA && !s.HasPhraseBoxB {
// a single word should be faster than a lemma; but do not swap an empty string
if s.HasLemmaBoxA && !s.HasPhraseBoxB && s.Proximate != "" {
s.SwapWordAndLemma()
return
}

// consider looking for the string with more characters in it first
if len(s.Seeking) > 0 && len(s.Proximate) > 0 {
s.SearchQuickestFirst()
return
}
}

Expand Down
2 changes: 1 addition & 1 deletion vectorquerylda.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func LDASearch(c echo.Context, srch SearchStruct) error {
LDAMSG = `Building LDA model for the current selections`
ESM1 = "<br>preparing the text for modeling"
ESM2 = "<br>building topic models"
ESM3 = "<br>using t-Distributed Stochastic Neighbor Embedding to build graph"
ESM3 = "<br>using t-Distributed Stochastic Neighbor Embedding to build graph (please be patient...)"
)
c.Response().After(func() { messenger.LogPaths("LDASearch()") })

Expand Down

0 comments on commit 6497473

Please sign in to comment.