From c9742285a7eca108c7c89f53475d26e305ecb962 Mon Sep 17 00:00:00 2001 From: hougang liu Date: Tue, 28 May 2019 17:08:56 +0800 Subject: [PATCH] Fix issue of hyperband suggestion service cannot move on --- .../v1alpha1/studyjob/studyjob_controller.go | 25 ++++++++----------- pkg/suggestion/v1alpha1/hyperband_service.go | 7 ++++++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pkg/controller/v1alpha1/studyjob/studyjob_controller.go b/pkg/controller/v1alpha1/studyjob/studyjob_controller.go index eb7b3159841..dbf1dfef992 100644 --- a/pkg/controller/v1alpha1/studyjob/studyjob_controller.go +++ b/pkg/controller/v1alpha1/studyjob/studyjob_controller.go @@ -542,21 +542,6 @@ func (r *ReconcileStudyJobController) checkStatus(instance *katibv1alpha1.StudyJ } func (r *ReconcileStudyJobController) getAndRunSuggestion(instance *katibv1alpha1.StudyJob, c katibapi.ManagerClient, ns string) (bool, error) { - //Check Suggestion Count - sps, err := getSuggestionParam(c, instance.Status.SuggestionParameterID) - if err != nil { - return false, err - } - for i := range sps { - if sps[i].Name == "SuggestionCount" { - count, _ := strconv.Atoi(sps[i].Value) - if count >= instance.Status.SuggestionCount+1 { - //Suggestion count mismatched. May be duplicate suggestion request - return false, nil - } - sps[i].Value = strconv.Itoa(instance.Status.SuggestionCount + 1) - } - } //GetSuggestion getSuggestReply, err := getSuggestion( c, @@ -604,7 +589,17 @@ func (r *ReconcileStudyJobController) getAndRunSuggestion(instance *katibv1alpha }, ) } + //Update Suggestion Count + sps, err := getSuggestionParam(c, instance.Status.SuggestionParameterID) + if err != nil { + return false, err + } + for i := range sps { + if sps[i].Name == "SuggestionCount" { + sps[i].Value = strconv.Itoa(instance.Status.SuggestionCount + 1) + } + } sspr := &katibapi.SetSuggestionParametersRequest{ StudyId: instance.Status.StudyID, SuggestionAlgorithm: instance.Spec.SuggestionSpec.SuggestionAlgorithm, diff --git a/pkg/suggestion/v1alpha1/hyperband_service.go b/pkg/suggestion/v1alpha1/hyperband_service.go index 9a21a4daa15..11544d589e6 100644 --- a/pkg/suggestion/v1alpha1/hyperband_service.go +++ b/pkg/suggestion/v1alpha1/hyperband_service.go @@ -365,6 +365,9 @@ func (h *HyperBandSuggestService) evalWorkers(ctx context.Context, c api.Manager if ml.WorkerStatus != api.State_COMPLETED { return nil, nil } + if len(ml.MetricsLogs) == 0 { + return nil, nil + } v, _ := strconv.ParseFloat(ml.MetricsLogs[0].Values[len(ml.MetricsLogs[0].Values)-1].Value, 64) vs += v } @@ -441,6 +444,10 @@ func (h *HyperBandSuggestService) GetSuggestions(ctx context.Context, in *api.Ge hbparam.evaluatingTrials = tids h.shLoopParamUpdate(in.StudyId, hbparam) err = h.saveSuggestionParameters(ctx, c, in.StudyId, in.SuggestionAlgorithm, in.ParamId, hbparam) + if err != nil { + klog.Fatalf("saveSuggestionParameters failed: %v", err) + return &api.GetSuggestionsReply{}, err + } return &api.GetSuggestionsReply{ Trials: ts, }, nil