Skip to content

Commit

Permalink
cancel code for LDASearch()
Browse files Browse the repository at this point in the history
  • Loading branch information
e-gun committed Feb 28, 2024
1 parent 1e66fee commit 80ae4af
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 21 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ module github.com/e-gun/HipparchiaGoServer
go 1.21

require (
github.com/e-gun/nlp v0.0.0-20230418221101-577c2209ffcc
github.com/e-gun/nlp v0.0.2
github.com/e-gun/tsnemp v0.0.0
github.com/e-gun/wego v0.0.18
github.com/e-gun/wego v0.1.0
github.com/go-echarts/go-echarts/v2 v2.3.3
github.com/google/uuid v1.6.0
github.com/gorilla/websocket v1.5.1
Expand Down
11 changes: 6 additions & 5 deletions rt-session.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,16 +248,17 @@ func RtResetSession(c echo.Context) error {

AllSessions.Delete(id)

// cancel any searches in progress
// cancel any searches in progress: you are about to do a .CancelFnc()
WSInfo.Reset <- id

// two-part searches are not canceled yet; and the incomplete results will be handed to the next function
// [a] two-part searches are not canceled yet; and the incomplete results will be handed to the next function
// canceling the subsequent parts happens via SSBuildQueries()
// if !AllSessions.IsInVault(s.User) no actual queries will be loaded into the ss so the search ends instantly

// there are no easy ways to halt a vector search once it starts training since the wego code has taken over
// you can only stop in the text building and parsing stage, but these will usually be over before you can mouse
// over and click "reset"...
// [b] a different mechanism is used to halt a nn vector search once it starts training and the wego code has taken over
// but the supplied context can cancel a training loop, yield empty embeddings, and then skip storage

// [c] lda uses a similar mechanism: context inserted into nlp.LatentDirichletAllocation in the nlp code

// reset the user ID and session
newid := writeUUIDCookie(c)
Expand Down
7 changes: 6 additions & 1 deletion rt-websocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,10 @@ func BuildWSInfoHubIf() *WSInfoHubInterface {

// WSSearchInfoHub - the loop that lets you read/write from/to the various WSSrchInfo channels via the WSInfo global (a *WSInfoHubInterface)
func WSSearchInfoHub() {
const (
CANC = "WSSearchInfoHub() reports that '%s' was cancelled"
)

var (
Allinfo = make(map[string]WSSrchInfo)
Finished = make(map[string]bool)
Expand Down Expand Up @@ -426,11 +430,12 @@ func WSSearchInfoHub() {
}
}

// see also the notes at RtResetSession()
cancelall := func(u string) {
for _, v := range Allinfo {
if v.User == u {
v.CancelFnc()
// msg(v.ID+" canceled", 1)
msg(fmt.Sprintf(CANC, v.ID), MSGPEEK)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion searchstructbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ func SessionIntoBulkSearch(c echo.Context, lim int) SearchStruct {
ss.LemmaTwo = ""
ss.SkgSlice = []string{}
ss.CurrentLimit = lim
ss.InitSum = "(gathering and formatting lines of text)"
ss.InitSum = "Gathering and formatting the text..."
ss.ID = strings.Replace(uuid.New().String(), "-", "", -1)

// BuildDefaultSearch() set some things that need resetting
Expand Down
4 changes: 2 additions & 2 deletions selftestsuite.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ func selftestsuite() {

u := fmt.Sprintf("http://%s:%d/", Config.HostIP, Config.HostPort)

tt := [5]bool{true, true, true, true, true}
// tt := [5]bool{false, false, false, true, false}
// tt := [5]bool{true, true, true, true, true}
tt := [5]bool{false, false, false, false, true}

getter := func(u string) {
res, e := http.Get(u)
Expand Down
8 changes: 5 additions & 3 deletions vectornnembeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func generateembeddings(c echo.Context, modeltype string, s SearchStruct) embedd
// BUT word2vec and lexvec do not do this (much): and glove does: +50MB to model Hdt
// bleh. The problem is in imported code?

enablecancellation := func(m model.CtxModel) {
enablecancellation := func(m model.ModelWithCtx) {
InsertNewContextIntoSS(&s)
m.InsertContext(s.Context)
WSInfo.InsertInfo <- GenerateSrchInfo(&s)
Expand All @@ -179,6 +179,7 @@ func generateembeddings(c echo.Context, modeltype string, s SearchStruct) embedd
if err != nil {
msg(FAIL1, MSGWARN)
}
enablecancellation(m)
vmodel = m
ti = cfg.Iter
case "lexvec":
Expand All @@ -187,16 +188,16 @@ func generateembeddings(c echo.Context, modeltype string, s SearchStruct) embedd
if err != nil {
msg(FAIL1, MSGWARN)
}
enablecancellation(m)
vmodel = m
ti = cfg.Iter
default:
cfg := w2vvectorconfig()
m, err := word2vec.NewForOptions(cfg)
enablecancellation(m)

if err != nil {
msg(FAIL1, MSGWARN)
}
enablecancellation(m)
vmodel = m
ti = cfg.Iter
}
Expand All @@ -213,6 +214,7 @@ func generateembeddings(c echo.Context, modeltype string, s SearchStruct) embedd
finished := make(chan bool)

// .Train() but do not block; so we can also .Reporter()
// NB the training has a copy of the ss's context.Cancel; wego's trainPerThread() is responsive to RtResetSession()
go func() {
if err := vmodel.Train(b); err != nil {
msg(FAIL2, 1)
Expand Down
33 changes: 26 additions & 7 deletions vectorquerylda.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,11 @@ func LDASearch(c echo.Context, srch SearchStruct) error {

var vs SearchStruct
if srch.ID != "ldamodelbot()" {
WSInfo.UpdateRemain <- WSSIKVi{srch.WSID, 1}
WSInfo.UpdateSummMsg <- WSSIKVs{srch.WSID, LDAMSG}
WSInfo.UpdateVProgMsg <- WSSIKVs{srch.WSID, fmt.Sprintf(ESM1)}
vs = SessionIntoBulkSearch(c, Config.VectorMaxlines)
WSInfo.InsertInfo <- GenerateSrchInfo(&vs)
WSInfo.UpdateRemain <- WSSIKVi{vs.WSID, 1}
WSInfo.UpdateSummMsg <- WSSIKVs{vs.WSID, LDAMSG}
WSInfo.UpdateVProgMsg <- WSSIKVs{vs.WSID, fmt.Sprintf(ESM1)}
} else {
vs = srch
}
Expand All @@ -108,7 +109,7 @@ func LDASearch(c echo.Context, srch SearchStruct) error {
stops := StringMapKeysIntoSlice(getstopset())
vectoriser := nlp.NewCountVectoriser(stops...)

WSInfo.UpdateVProgMsg <- WSSIKVs{srch.WSID, fmt.Sprintf(ESM2)}
WSInfo.UpdateVProgMsg <- WSSIKVs{vs.WSID, fmt.Sprintf(ESM2)}

// consider building TESTITERATIONS models and making a table for each
var dot mat.Matrix
Expand All @@ -120,7 +121,7 @@ func LDASearch(c echo.Context, srch SearchStruct) error {
return JSONresponse(c, SearchOutputJSON{})
}

docsOverTopics, topicsOverWords := ldamodel(ntopics, corpus, vectoriser)
docsOverTopics, topicsOverWords := ldamodel(ntopics, corpus, vectoriser, &vs)
tables = append(tables, ldatopicsummary(ntopics, topicsOverWords, vectoriser, docsOverTopics))
tables = append(tables, ldatopsentences(ntopics, bags, corpus, docsOverTopics))
dot = docsOverTopics
Expand All @@ -131,7 +132,7 @@ func LDASearch(c echo.Context, srch SearchStruct) error {

var img string
if se.LDAgraph || srch.ID == "ldamodelbot()" {
WSInfo.UpdateVProgMsg <- WSSIKVs{srch.ID, fmt.Sprintf(ESM3)}
WSInfo.UpdateVProgMsg <- WSSIKVs{vs.ID, fmt.Sprintf(ESM3)}
img = ldaplot(se.LDA2D, ntopics, incl, se.VecTextPrep, dot, bags)
}

Expand Down Expand Up @@ -234,6 +235,11 @@ func ldapreptext(bagger string, vs *SearchStruct) []BagWithLocus {
}

slicedwords := StringMapKeysIntoSlice(allwords)
// catching resets
if Config.SelfTest == 0 && !Config.VectorBot && !AllSessions.IsInVault(vs.User) {
return []BagWithLocus{}
}

morphmapdbm := arraytogetrequiredmorphobjects(slicedwords) // map[string]DbMorphology
morphmapstrslc := buildmorphmapstrslc(slicedwords, morphmapdbm)

Expand All @@ -252,6 +258,11 @@ func ldapreptext(bagger string, vs *SearchStruct) []BagWithLocus {
thebags = ldawinnerbagging(thebags, winnermap)
}

// catching resets
if Config.SelfTest == 0 && !Config.VectorBot && !AllSessions.IsInVault(vs.User) {
return []BagWithLocus{}
}

return thebags
}

Expand Down Expand Up @@ -302,13 +313,21 @@ func ldamontecarlobagging(thebags []BagWithLocus, montecarlo map[string]hwguesse
}

// ldamodel - build the lda model for the corpus
func ldamodel(topics int, corpus []string, vectoriser *nlp.CountVectoriser) (mat.Matrix, mat.Matrix) {
func ldamodel(topics int, corpus []string, vectoriser *nlp.CountVectoriser, s *SearchStruct) (mat.Matrix, mat.Matrix) {
const (
FAIL = "Failed to model topics for documents"
)

enablecancellation := func(l *nlp.LatentDirichletAllocation) {
InsertNewContextIntoSS(s)
l.Ctx = s.Context
WSInfo.InsertInfo <- GenerateSrchInfo(s)
}

cfg := ldavecconfig()
lda := nlp.NewLatentDirichletAllocation(topics)
enablecancellation(lda)

lda.Processes = cfg.Goroutines
lda.Iterations = cfg.LDAIterations
lda.TransformationPasses = cfg.LDAXformPasses
Expand Down

0 comments on commit 80ae4af

Please sign in to comment.