Skip to content

Commit

Permalink
update lda default values and documentation; flag a WSInfo issue in…
Browse files Browse the repository at this point in the history
… `LDASearch()`
  • Loading branch information
e-gun committed Feb 28, 2024
1 parent 80ae4af commit a1904a2
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 23 deletions.
15 changes: 13 additions & 2 deletions INSTRUCTIONS/BASIC_USE.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
16. Get a text of the current selection.
17. Build and index of the current selection.
18. Get the vocabulary for the current selection.
19. I need to know still more...
19. Cancel a long search / vectorization request
20. I need to know still more...

---
[0] **Built-in tips**
Expand Down Expand Up @@ -239,7 +240,17 @@ Click the icon and a vocabulary list will be generated for whatever you have sel

---

[19] **I need to know still more...**
[19] Cancel a long search / vectorization request.

Click on the circled `X` in the upper left portion of the screen. This will reset your session and cancel
any searches in progress. Note that you will lose any search lists, etc. that are currently in use.
Most useful to people who accidentally do an `everything` request on a slow machine and are in danger of jamming their CPU for several minutes.

![inst03](../gitimg/basic_use/01_basic_search_area.png)

---

[20] **I need to know still more...**

More help is also available via the `?` button in the bottom right corner of the page. There you can learn about
`regex`, see some warnings about the data, etc. See also the separate instructions about `semantic vectors`.
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* near/not-near syntax
* progress polling
* automatic rewrites of searches to optimize for speed
* can cancel long in-progress searches (and/or vectorizations)
* dictionaries
* text browsing
* text maker
Expand Down
11 changes: 11 additions & 0 deletions conf/sample-hgs-vector-conf-lda.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"SentencesPerBag": 1,
"LDAIterations": 200,
"LDAXformPasses": 100,
"BurnInPasses": 2,
"ChangeEvalFrq": 10,
"PerplexEvalFrq": 10,
"PerplexTol": 0.01,
"Goroutines": 20,
"MaxLDAGraphSize": 30000
}
10 changes: 5 additions & 5 deletions constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ const (
LDATOPICS = 8
LDAMAXTOPICS = 30
LDASENTPERBAG = 1
LDAITER = 60
LDAXFORMPASSES = 30
LDABURNINPASSES = 1
LDACHGEVALFRQ = 15
LDAPERPEVALFRQ = 15
LDAITER = 200
LDAXFORMPASSES = 100
LDABURNINPASSES = 2
LDACHGEVALFRQ = 10
LDAPERPEVALFRQ = 10
LDAPERPTOL = 1e-2
LDAMAXGRAPHLINES = 30000
MAXBROWSERCONTEXT = 60
Expand Down
51 changes: 42 additions & 9 deletions rt-websocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,12 +386,14 @@ func BuildWSInfoHubIf() *WSInfoHubInterface {
// WSSearchInfoHub - the loop that lets you read/write from/to the various WSSrchInfo channels via the WSInfo global (a *WSInfoHubInterface)
func WSSearchInfoHub() {
const (
CANC = "WSSearchInfoHub() reports that '%s' was cancelled"
CANC = "WSSearchInfoHub() reports that '%s' was cancelled"
FINWAIT = 10
FINCHK = 60
)

var (
Allinfo = make(map[string]WSSrchInfo)
Finished = make(map[string]bool)
Finished = make(map[string]time.Time)
)

reporter := func(r WSSIReply) {
Expand Down Expand Up @@ -423,23 +425,54 @@ func WSSearchInfoHub() {
return count
}

// see also the notes at RtResetSession()
cancelall := func(u string) {
for _, v := range Allinfo {
if v.User == u {
v.CancelFnc()
msg(fmt.Sprintf(CANC, v.ID), MSGPEEK)
}
}
}

// this silly mechanism because selftest had 2nd round of nn vector tests respawning after deletion; rare, but...
storeunlessfinished := func(si WSSrchInfo) {
if _, ok := Finished[si.ID]; !ok {
Allinfo[si.ID] = si
}
}

// see also the notes at RtResetSession()
cancelall := func(u string) {
for _, v := range Allinfo {
if v.User == u {
v.CancelFnc()
msg(fmt.Sprintf(CANC, v.ID), MSGPEEK)
// storeunlessfinished() requires a cleanup function too...
cleanfinished := func() {
for {
for f := range Finished {
ft := Finished[f]
later := ft.Add(time.Second * FINWAIT)
if time.Now().After(later) {
delete(Finished, f)
} else {
fmt.Println(later)
}
}
time.Sleep(time.Second * FINCHK)
}
}

go cleanfinished()

//UNCOMMENT FOR DEBUGGING BUILDS
//allinfo := func() {
// for {
// ai := StringMapKeysIntoSlice(Allinfo)
// msg("ai: "+strings.Join(ai, ", "), 2)
// for f := range Finished {
// msg(f+" is in finished", 2)
// }
// time.Sleep(1 * time.Second)
// }
//}
//go allinfo()

// the main loop; it will never exit
for {
select {
Expand Down Expand Up @@ -476,7 +509,7 @@ func WSSearchInfoHub() {
case reset := <-WSInfo.Reset:
cancelall(reset)
case del := <-WSInfo.Del:
Finished[del] = true
Finished[del] = time.Now()
delete(Allinfo, del)
}
}
Expand Down
4 changes: 2 additions & 2 deletions selftestsuite.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ func selftestsuite() {

u := fmt.Sprintf("http://%s:%d/", Config.HostIP, Config.HostPort)

// tt := [5]bool{true, true, true, true, true}
tt := [5]bool{false, false, false, false, true}
tt := [5]bool{true, true, true, true, true}
// tt := [5]bool{false, false, false, false, true}

getter := func(u string) {
res, e := http.Get(u)
Expand Down
31 changes: 26 additions & 5 deletions vectorquerylda.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ func (b *BagWithLocus) GetWL() {
func LDASearch(c echo.Context, srch SearchStruct) error {
const (
LDAMSG = `Building LDA model for the current selections`
ESM1 = "<br>preparing the text for modeling"
ESM2 = "<br>building topic models"
ESM1 = "preparing the text for modeling"
ESM2 = "building topic models"
ESM3 = "<br>using t-Distributed Stochastic Neighbor Embedding to build graph (please be patient...)"
)
c.Response().After(func() { messenger.LogPaths("LDASearch()") })
Expand All @@ -91,16 +91,37 @@ func LDASearch(c echo.Context, srch SearchStruct) error {
var vs SearchStruct
if srch.ID != "ldamodelbot()" {
vs = SessionIntoBulkSearch(c, Config.VectorMaxlines)
WSInfo.InsertInfo <- GenerateSrchInfo(&vs)
WSInfo.UpdateRemain <- WSSIKVi{vs.WSID, 1}
WSInfo.UpdateSummMsg <- WSSIKVs{vs.WSID, LDAMSG}
WSInfo.UpdateVProgMsg <- WSSIKVs{vs.WSID, fmt.Sprintf(ESM1)}
} else {
vs = srch
}

// DEBUGGING WSInfo UPDATE ISSUES

//msg(vs.WSID, 2)
//
//getsrchinfo := func() WSSrchInfo {
// responder := WSSIReply{key: vs.WSID, response: make(chan WSSrchInfo)}
// WSInfo.RequestInfo <- responder
// return <-responder.response
//}

// [A] WSInfo works for next...
// WSInfo.UpdateVProgMsg <- WSSIKVs{vs.WSID, fmt.Sprintf("XXXXX")}

//si := getsrchinfo()
//fmt.Println(si)

bags := ldapreptext(se.VecTextPrep, &vs)

//si = getsrchinfo()
//fmt.Println(si)

// [B] but now WSInfo is broken...
// WSInfo.UpdateVProgMsg <- WSSIKVs{vs.WSID, fmt.Sprintf("YYYY")}

corpus := make([]string, len(bags))
for i := 0; i < len(bags); i++ {
corpus[i] = bags[i].ModifiedBag
Expand All @@ -109,7 +130,7 @@ func LDASearch(c echo.Context, srch SearchStruct) error {
stops := StringMapKeysIntoSlice(getstopset())
vectoriser := nlp.NewCountVectoriser(stops...)

WSInfo.UpdateVProgMsg <- WSSIKVs{vs.WSID, fmt.Sprintf(ESM2)}
WSInfo.UpdateSummMsg <- WSSIKVs{vs.WSID, fmt.Sprintf(ESM2)}

// consider building TESTITERATIONS models and making a table for each
var dot mat.Matrix
Expand All @@ -132,7 +153,7 @@ func LDASearch(c echo.Context, srch SearchStruct) error {

var img string
if se.LDAgraph || srch.ID == "ldamodelbot()" {
WSInfo.UpdateVProgMsg <- WSSIKVs{vs.ID, fmt.Sprintf(ESM3)}
WSInfo.UpdateVProgMsg <- WSSIKVs{vs.WSID, fmt.Sprintf(ESM3)}
img = ldaplot(se.LDA2D, ntopics, incl, se.VecTextPrep, dot, bags)
}

Expand Down

0 comments on commit a1904a2

Please sign in to comment.