Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TestReconcileBatchJob #2350

Merged
merged 10 commits into from
Jun 14, 2024
Merged
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 145 additions & 127 deletions pkg/controller.v1beta1/trial/trial_controller_test.go
Copy link
Member

@tenzen-y tenzen-y Jun 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH, we should reimplement these tests as ginkgo BDD style, but that is out of scope of this PR.
So, I leave such refactoring in the future.

Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"testing"
"time"

"github.com/golang/mock/gomock"
"github.com/onsi/gomega"
"github.com/prometheus/client_golang/prometheus"
"github.com/spf13/viper"
Expand All @@ -43,7 +42,6 @@ import (
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
trialutil "github.com/kubeflow/katib/pkg/controller.v1beta1/trial/util"
"github.com/kubeflow/katib/pkg/controller.v1beta1/util"
managerclientmock "github.com/kubeflow/katib/pkg/mock/v1beta1/trial/managerclient"
)

const (
Expand All @@ -58,7 +56,7 @@ var trialKey = types.NamespacedName{Name: trialName, Namespace: namespace}
var batchJobKey = types.NamespacedName{Name: batchJobName, Namespace: namespace}

func init() {
logf.SetLogger(zap.New())
logf.SetLogger(zap.New(zap.UseDevMode(true)))
}

func TestAdd(t *testing.T) {
Expand Down Expand Up @@ -89,9 +87,7 @@ func TestAdd(t *testing.T) {
func TestReconcileBatchJob(t *testing.T) {
g := gomega.NewGomegaWithT(t)

mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()
mockManagerClient := managerclientmock.NewMockManagerClient(mockCtrl)
mockMC := &mockManagerClient{}

// Setup the Manager and Controller. Wrap the Controller Reconcile function so it writes each request to a
// channel when it is finished.
Expand All @@ -102,7 +98,7 @@ func TestReconcileBatchJob(t *testing.T) {
r := &ReconcileTrial{
Client: mgr.GetClient(),
scheme: mgr.GetScheme(),
ManagerClient: mockManagerClient,
ManagerClient: mockMC,
recorder: mgr.GetEventRecorderFor(ControllerName),
collector: trialutil.NewTrialsCollector(mgr.GetCache(), prometheus.NewRegistry()),
}
Expand Down Expand Up @@ -179,137 +175,146 @@ func TestReconcileBatchJob(t *testing.T) {
},
}

mockManagerClient.EXPECT().GetTrialObservationLog(gomock.Any()).Return(observationLogAvailable, nil).Times(1)
mockManagerClient.EXPECT().GetTrialObservationLog(gomock.Any()).Return(observationLogUnavailable, nil).MinTimes(1)
mockManagerClient.EXPECT().DeleteTrialObservationLog(gomock.Any()).Return(nil, nil).AnyTimes()
t.Run(`Trial run with "Failed" BatchJob.`, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
mockMC.msg = observationLogUnavailable
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mockMC.msg = observationLogUnavailable
mockManagerClient.EXPECT().GetTrialObservationLog(gomock.Any()).Return(observationLogAvailable, nil).AnyTimes()

Instead of this original mock client, can we reuse the autogenerated mock clients like this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need WithOverridableExpectations here, but github.com/golang/mock/mock does not support this feature.
Or "Each test should create a new Controller".
In my opinion, gomock is not flexible enough, and it complicates a simple thing for this scenario.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Finally, I found another way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@forsaken628 I opened the issue to replace gomock with go.uber.com/gomock here: #2356

As a separate PR, could you work on the issue?

trial := newFakeTrialBatchJob()
batchJob := &batchv1.Job{}

// Test 1 - Trial run with "Failed" BatchJob.
trial := newFakeTrialBatchJob()
batchJob := &batchv1.Job{}
// Create the Trial
g.Expect(c.Create(ctx, trial)).NotTo(gomega.HaveOccurred())

// Create the Trial
g.Expect(c.Create(ctx, trial)).NotTo(gomega.HaveOccurred())
// Expect that BatchJob with appropriate name is created
g.Eventually(func() error {
return c.Get(ctx, batchJobKey, batchJob)
}, timeout).Should(gomega.Succeed())

// Expect that BatchJob with appropriate name is created
g.Eventually(func() error {
return c.Get(ctx, batchJobKey, batchJob)
}, timeout).Should(gomega.Succeed())
// Expect that Trial status is running
g.Eventually(func() bool {
if err = c.Get(ctx, trialKey, trial); err != nil {
return false
}
return trial.IsRunning()
}, timeout).Should(gomega.BeTrue())

// Manually update BatchJob status to failed
// Expect that Trial status is failed
g.Eventually(func() bool {
if err = c.Get(ctx, batchJobKey, batchJob); err != nil {
return false
}
batchJob.Status = batchv1.JobStatus{
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobFailed,
Status: corev1.ConditionTrue,
Message: "BatchJob failed test message",
Reason: "BatchJob failed test reason",
},
},
}
if err = c.Status().Update(ctx, batchJob); err != nil {
return false
}

// Expect that Trial status is running
g.Eventually(func() bool {
if err = c.Get(ctx, trialKey, trial); err != nil {
return false
}
return trial.IsRunning()
}, timeout).Should(gomega.BeTrue())

// Manually update BatchJob status to failed
// Expect that Trial status is failed
g.Eventually(func() bool {
if err = c.Get(ctx, batchJobKey, batchJob); err != nil {
return false
}
if err = c.Get(ctx, trialKey, trial); err != nil {
return false
}
return trial.IsFailed()
}, timeout).Should(gomega.BeTrue())

// Delete the Trial
g.Expect(c.Delete(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial is deleted
// BatchJob can't be deleted because GC doesn't work in envtest and BatchJob stuck in termination phase.
// Ref: https://book.kubebuilder.io/reference/testing/envtest.html#testing-considerations.
g.Eventually(func() bool {
return errors.IsNotFound(c.Get(ctx, trialKey, &trialsv1beta1.Trial{}))
}, timeout).Should(gomega.BeTrue())
})

t.Run(`Trail with "Complete" BatchJob and Available metrics.`, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
mockMC.msg = observationLogAvailable
forsaken628 marked this conversation as resolved.
Show resolved Hide resolved
batchJob := &batchv1.Job{}
batchJobCompleteMessage := "BatchJob completed test message"
batchJobCompleteReason := "BatchJob completed test reason"
// Update BatchJob status to Complete.
g.Expect(c.Get(ctx, batchJobKey, batchJob)).NotTo(gomega.HaveOccurred())
batchJob.Status = batchv1.JobStatus{
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobFailed,
Type: batchv1.JobComplete,
Status: corev1.ConditionTrue,
Message: "BatchJob failed test message",
Reason: "BatchJob failed test reason",
Message: batchJobCompleteMessage,
Reason: batchJobCompleteReason,
},
},
}
if err = c.Status().Update(ctx, batchJob); err != nil {
return false
}

if err = c.Get(ctx, trialKey, trial); err != nil {
return false
}
return trial.IsFailed()
}, timeout).Should(gomega.BeTrue())

// Delete the Trial
g.Expect(c.Delete(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial is deleted
// BatchJob can't be deleted because GC doesn't work in envtest and BatchJob stuck in termination phase.
// Ref: https://book.kubebuilder.io/reference/testing/envtest.html#testing-considerations.
g.Eventually(func() bool {
return errors.IsNotFound(c.Get(ctx, trialKey, &trialsv1beta1.Trial{}))
}, timeout).Should(gomega.BeTrue())

// Test 2 - Trail with "Complete" BatchJob and Available metrics.
batchJobCompleteMessage := "BatchJob completed test message"
batchJobCompleteReason := "BatchJob completed test reason"
// Update BatchJob status to Complete.
g.Expect(c.Get(ctx, batchJobKey, batchJob)).NotTo(gomega.HaveOccurred())
batchJob.Status = batchv1.JobStatus{
Conditions: []batchv1.JobCondition{
{
Type: batchv1.JobComplete,
Status: corev1.ConditionTrue,
Message: batchJobCompleteMessage,
Reason: batchJobCompleteReason,
},
},
}
g.Expect(c.Status().Update(ctx, batchJob)).NotTo(gomega.HaveOccurred())

// Create the Trial
trial = newFakeTrialBatchJob()
g.Expect(c.Create(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial status is succeeded and metrics are properly populated
// Metrics available because GetTrialObservationLog returns values
g.Eventually(func() bool {
if err = c.Get(ctx, trialKey, trial); err != nil {
return false
}
return trial.IsSucceeded() &&
len(trial.Status.Observation.Metrics) > 0 &&
trial.Status.Observation.Metrics[0].Min == "0.11" &&
trial.Status.Observation.Metrics[0].Max == "0.99" &&
trial.Status.Observation.Metrics[0].Latest == "0.11"
}, timeout).Should(gomega.BeTrue())

// Delete the Trial
g.Expect(c.Delete(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial is deleted
g.Eventually(func() bool {
return errors.IsNotFound(c.Get(ctx, trialKey, &trialsv1beta1.Trial{}))
}, timeout).Should(gomega.BeTrue())

// Test 3 - Trail with "Complete" BatchJob and Unavailable metrics.
// Create the Trial
trial = newFakeTrialBatchJob()
g.Expect(c.Create(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial status is succeeded with "false" status and "metrics unavailable" reason.
// Metrics unavailable because GetTrialObservationLog returns "unavailable".
g.Eventually(func() bool {
if err = c.Get(ctx, trialKey, trial); err != nil {
return false
}
return trial.IsMetricsUnavailable() &&
len(trial.Status.Observation.Metrics) > 0 &&
trial.Status.Observation.Metrics[0].Min == consts.UnavailableMetricValue &&
trial.Status.Observation.Metrics[0].Max == consts.UnavailableMetricValue &&
trial.Status.Observation.Metrics[0].Latest == consts.UnavailableMetricValue
}, timeout).Should(gomega.BeTrue())

// Delete the Trial
g.Expect(c.Delete(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial is deleted
g.Eventually(func() bool {
return errors.IsNotFound(c.Get(ctx, trialKey, &trialsv1beta1.Trial{}))
}, timeout).Should(gomega.BeTrue())

// Test 4 - Update status for empty Trial
g.Expect(r.updateStatus(&trialsv1beta1.Trial{})).To(gomega.HaveOccurred())

g.Expect(c.Status().Update(ctx, batchJob)).NotTo(gomega.HaveOccurred())

// Create the Trial
trial := newFakeTrialBatchJob()
g.Expect(c.Create(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial status is succeeded and metrics are properly populated
// Metrics available because GetTrialObservationLog returns values
start := time.Now()
g.Eventually(func() bool {
if err = c.Get(ctx, trialKey, trial); err != nil {
t.Log(time.Since(start), err)
return false
}
return trial.IsSucceeded() &&
len(trial.Status.Observation.Metrics) > 0 &&
trial.Status.Observation.Metrics[0].Min == "0.11" &&
trial.Status.Observation.Metrics[0].Max == "0.99" &&
trial.Status.Observation.Metrics[0].Latest == "0.11"
}, timeout).Should(gomega.BeTrue())

// Delete the Trial
g.Expect(c.Delete(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial is deleted
g.Eventually(func() bool {
return errors.IsNotFound(c.Get(ctx, trialKey, &trialsv1beta1.Trial{}))
}, timeout).Should(gomega.BeTrue())
})

t.Run(`Trail with "Complete" BatchJob and Unavailable metrics.`, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
mockMC.msg = observationLogUnavailable
forsaken628 marked this conversation as resolved.
Show resolved Hide resolved
// Create the Trial
trial := newFakeTrialBatchJob()
g.Expect(c.Create(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial status is succeeded with "false" status and "metrics unavailable" reason.
// Metrics unavailable because GetTrialObservationLog returns "unavailable".
g.Eventually(func() bool {
if err = c.Get(ctx, trialKey, trial); err != nil {
return false
}
return trial.IsMetricsUnavailable() &&
len(trial.Status.Observation.Metrics) > 0 &&
trial.Status.Observation.Metrics[0].Min == consts.UnavailableMetricValue &&
trial.Status.Observation.Metrics[0].Max == consts.UnavailableMetricValue &&
trial.Status.Observation.Metrics[0].Latest == consts.UnavailableMetricValue
}, timeout).Should(gomega.BeTrue())

// Delete the Trial
g.Expect(c.Delete(ctx, trial)).NotTo(gomega.HaveOccurred())

// Expect that Trial is deleted
g.Eventually(func() bool {
return errors.IsNotFound(c.Get(ctx, trialKey, &trialsv1beta1.Trial{}))
}, timeout).Should(gomega.BeTrue())
})

t.Run("Update status for empty Trial", func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
g.Expect(r.updateStatus(&trialsv1beta1.Trial{})).To(gomega.HaveOccurred())
})
}

func TestGetObjectiveMetricValue(t *testing.T) {
Expand Down Expand Up @@ -428,3 +433,16 @@ func newFakeTrialBatchJob() *trialsv1beta1.Trial {
},
}
}

type mockManagerClient struct {
msg *api_pb.GetObservationLogReply
}

func (c *mockManagerClient) GetTrialObservationLog(instance *trialsv1beta1.Trial) (*api_pb.GetObservationLogReply, error) {
return c.msg, nil
}

func (c *mockManagerClient) DeleteTrialObservationLog(instance *trialsv1beta1.Trial) (*api_pb.DeleteObservationLogReply, error) {
c.msg = nil
return &api_pb.DeleteObservationLogReply{}, nil
}
Loading