From a63b132d4279e2ad70dafba794865712c9650069 Mon Sep 17 00:00:00 2001 From: Mark Mandel Date: Tue, 2 Oct 2018 09:26:24 -0700 Subject: [PATCH] Prioritise Allocation from Nodes with Allocated/Ready GameServers One of the first parts for Node autoscaling (#368) - make sure we essentially bin pack our allocated game servers. This change makes allocation first prioritise allocation from `Nodes` that already have the most `Allocated` `GameServers`, and then in the case of a tie, to the `Nodes` that have the most `Ready` `GameServers`. This sets us up for the next part, such that when we scale down a Fleet, it removes `GameServers` from `Nodes` that have the least `GameServers` on them. --- README.md | 4 + docs/create_fleetautoscaler.md | 2 + docs/fleet_spec.md | 6 ++ docs/fleetautoscaler_spec.md | 44 +++++++++ docs/scheduling_autoscaling.md | 53 +++++++++++ examples/fleet.yaml | 7 ++ pkg/apis/stable/v1alpha1/fleet.go | 24 +++++ pkg/apis/stable/v1alpha1/fleet_test.go | 2 + pkg/fleetallocation/controller.go | 34 +++---- pkg/fleetallocation/controller_test.go | 95 +++++++++++++++++++ pkg/fleetallocation/find.go | 88 +++++++++++++++++ pkg/fleetallocation/find_test.go | 125 +++++++++++++++++++++++++ test/e2e/fleet_test.go | 41 ++++---- 13 files changed, 492 insertions(+), 33 deletions(-) create mode 100644 docs/fleetautoscaler_spec.md create mode 100644 docs/scheduling_autoscaling.md create mode 100644 pkg/fleetallocation/find.go create mode 100644 pkg/fleetallocation/find_test.go diff --git a/README.md b/README.md index 01ec90caf1..abfd072c08 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ Documentation and usage guides on how to develop and host dedicated game servers ### Reference - [Game Server Specification](./docs/gameserver_spec.md) - [Fleet Specification](./docs/fleet_spec.md) +- [Fleet Autoscaler Specification](./docs/fleetautoscaler_spec.md) ### Examples - [Full GameServer Configuration](./examples/gameserver.yaml) @@ -67,6 +68,9 @@ Documentation and usage guides on how to develop and host dedicated game servers - [CPP Simple](./examples/cpp-simple) (C++) - C++ example that starts up, stays healthy and then shuts down after 60 seconds. - [Xonotic](./examples/xonotic) - Wraps the SDK around the open source FPS game [Xonotic](http://www.xonotic.org) and hosts it on Agones. +### Advanced +- [Scheduling and Autoscaling](./docs/scheduling_autoscaling.md) + ## Get involved - [Slack](https://join.slack.com/t/agones/shared_invite/enQtMzE5NTE0NzkyOTk1LWQ2ZmY1Mjc4ZDQ4NDJhOGYxYTY2NTY0NjUwNjliYzVhMWFjYjMxM2RlMjg3NGU0M2E0YTYzNDIxNDMyZGNjMjU) diff --git a/docs/create_fleetautoscaler.md b/docs/create_fleetautoscaler.md index 5472446b7e..22577d53c6 100644 --- a/docs/create_fleetautoscaler.md +++ b/docs/create_fleetautoscaler.md @@ -251,4 +251,6 @@ simple-udp-mzhrl-zg9rq Ready 10.30.64.99 [map[name:default port:7745]] ## Next Steps +Read the advanced [Scheduling and Autoscaling](scheduling_autoscaling.md) guide, for more details on autoscaling. + If you want to use your own GameServer container make sure you have properly integrated the [Agones SDK](../sdks/). \ No newline at end of file diff --git a/docs/fleet_spec.md b/docs/fleet_spec.md index 042e017dcf..53909e18af 100644 --- a/docs/fleet_spec.md +++ b/docs/fleet_spec.md @@ -15,6 +15,7 @@ metadata: name: fleet-example spec: replicas: 2 + scheduling: Packed strategy: type: RollingUpdate rollingUpdate: @@ -53,6 +54,11 @@ This is a very common pattern in the Kubernetes ecosystem. The `spec` field is the actual `Fleet` specification and it is composed as follow: - `replicas` is the number of `GameServers` to keep Ready or Allocated in this Fleet +- `scheduling`(⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️) defines how GameServers are organised across the cluster. Currently only affects Allocation, but will expand + in future releases. Options include: + "Packed" (default) is aimed at dynamic Kubernetes clusters, such as cloud providers, wherein we want to bin pack + resources. "Distributed" is aimed at static Kubernetes clusters, wherein we want to distribute resources across the entire + cluster. See [Scheduling and Autoscaling](scheduling_autoscaling.md) for more details. - `strategy` is the `GameServer` replacement strategy for when the `GameServer` template is edited. - `type` is replacement strategy for when the GameServer template is changed. Default option is "RollingUpdate", but "Recreate" is also available. - `RollingUpdate` will increment by `maxSurge` value on each iteration, while decrementing by `maxUnavailable` on each iteration, until all GameServers have been switched from one version to another. diff --git a/docs/fleetautoscaler_spec.md b/docs/fleetautoscaler_spec.md new file mode 100644 index 0000000000..5a1ecbc290 --- /dev/null +++ b/docs/fleetautoscaler_spec.md @@ -0,0 +1,44 @@ +# Fleet Autoscaler Specification + +⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️ + +A `FleetAutoscaler`'s job is to automatically scale up and down a `Fleet` in response to demand. + +A full `FleetAutoscaler` specification is available below and in the +[example folder](../examples/fleetautoscaler.yaml) for reference : + +```yaml +apiVersion: "stable.agones.dev/v1alpha1" +kind: FleetAutoscaler +metadata: + name: fleet-autoscaler-example +spec: + + fleetName: fleet-example + policy: + type: Buffer + buffer: + bufferSize: 5 + minReplicas: 10 + maxReplicas: 20 +``` + +Since Agones defines a new +[Custom Resources Definition (CRD)](https://kubernetes.io/docs/concepts/api-extension/custom-resources/) +we can define a new resource using the kind `FleetAutoscaler` with the custom group `stable.agones.dev` and API +version `v1alpha1`. + +The `spec` field is the actual `FleetAutoscaler` specification and it is composed as follows: + +- `fleetName` is name of the fleet to attach to and control. Must be an existing `Fleet` in the same namespace + as this `FleetAutoscaler`. +- `policy` is the autoscaling policy + - `type` is type of the policy. for now, only "Buffer" is available + - `buffer` parameters of the buffer policy + - `bufferSize` is the size of a buffer of "ready" game server instances + The FleetAutoscaler will scale the fleet up and down trying to maintain this buffer, + as instances are being allocated or terminated + it can be specified either in absolute (i.e. 5) or percentage format (i.e. 5%) + - `minReplicas` is the minimum fleet size to be set by this FleetAutoscaler. + if not specified, the minimum fleet size will be bufferSize + - `maxReplicas` is the maximum fleet size that can be set by this FleetAutoscaler. Required. \ No newline at end of file diff --git a/docs/scheduling_autoscaling.md b/docs/scheduling_autoscaling.md new file mode 100644 index 0000000000..f28bf60caa --- /dev/null +++ b/docs/scheduling_autoscaling.md @@ -0,0 +1,53 @@ +# Scheduling and Autoscaling + +⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️ + +> Autoscaling is currently ongoing work within Agones. The work you see here is just the beginning. + +Scheduling and autoscaling tend to go hand in hand, as where in the cluster `GameServers` are provisioned +tends to impact how to autoscale fleets up and down (or if you would even want to) + +## Fleet Autoscaling + +Fleet autoscaling is currently the only type of autoscaling that exists in Agones. It is also only available as a simple +buffer autoscaling strategy. Have a look at the [Create a Fleet Autoscaler](create_fleetautoscaler.md) quickstart, +and the [Fleet Autoscaler Specification](fleetautoscaler_spec.md) for details. + +Node scaling, and more sophisticated fleet autoscaling will be coming in future releases ([design](https://github.com/GoogleCloudPlatform/agones/issues/368)) + +## Fleet Allocation Scheduling + +There are two scheduling strategies for fleets - each designed for different types of Kubernetes Environments. + +### Packed + +This is the *default* Fleet scheduling strategy. It is designed for dynamic Kubernetes environments, wherein you wish +to scale up and down as load increases or decreases, such as in a Cloud environment where you are paying +for the infrastructure you use. + +It attempts to _pack_ as much as possible into the smallest set of nodes, to make +scaling infrastructure down as easy as possible. + +Currently, Allocation scheduling is the only aspect this strategy affects, but in future releases it will +also affect `GameServer` `Pod` scheduling, and `Fleet` scale down scheduling as well. + +#### Allocation Scheduling + +Under the "Packed" strategy, allocation will prioritise allocating `GameServers` to nodes that are running on +Nodes that already have allocated `GameServers` running on them. + +### Distributed + +This Fleet scheduling strategy is designed for static Kubernetes environments, such as when you are running Kubernetes +on bare metal, and the cluster size rarely changes, if at all. + +This attempts to distribute the load across the entire cluster as much as possible, to take advantage of the static +size of the cluster. + +Currently, the only thing the scheduling strategy affects is Allocation scheduling, but in future releases it will +also affect `GameServer` `Pod` scheduling, and `Fleet` scaledown scheduling as well. + +#### Allocation Scheduling + +Under the "Distributed" strategy, allocation will prioritise allocating `GameSerers` to nodes that have the least +number of allocated `GameServers` on them. diff --git a/examples/fleet.yaml b/examples/fleet.yaml index ebcb2e71c9..7ef1eec9c5 100644 --- a/examples/fleet.yaml +++ b/examples/fleet.yaml @@ -27,6 +27,13 @@ metadata: spec: # the number of GameServers to keep Ready or Allocated in this Fleet replicas: 2 + # defines how GameServers are organised across the cluster. Currently only affects Allocation, but will expand + # in future releases. Options include: + # "Packed" (default) is aimed at dynamic Kubernetes clusters, such as cloud providers, wherein we want to bin pack + # resources + # "Distributed" is aimed at static Kubernetes clusters, wherein we want to distribute resources across the entire + # cluster + scheduling: Packed # a GameServer template - see: # https://github.com/GoogleCloudPlatform/agones/blob/master/docs/gameserver_spec.md for all the options strategy: diff --git a/pkg/apis/stable/v1alpha1/fleet.go b/pkg/apis/stable/v1alpha1/fleet.go index a74c05d520..8c6fe5193d 100644 --- a/pkg/apis/stable/v1alpha1/fleet.go +++ b/pkg/apis/stable/v1alpha1/fleet.go @@ -22,11 +22,29 @@ import ( ) const ( + // Packed scheduling strategy will prioritise allocating GameServers + // on Nodes with the most Allocated, and then Ready GameServers + // to bin pack as many Allocated GameServers on a single node. + // This is most useful for dynamic Kubernetes clusters - such as on Cloud Providers. + // In future versions, this will also impact Fleet scale down, and Pod Scheduling. + // TODO: example and document + Packed SchedulingStrategy = "Packed" + + // Distributed scheduling strategy will prioritise allocating GameServers + // on Nodes with the least Allocated, and then Ready GameServers + // to distribute Allocated GameServers across many nodes. + // This is most useful for statically sized Kubernetes clusters - such as on physical hardware. + // In future versions, this will also impact Fleet scale down, and Pod Scheduling. + // TODO: example, and document + Distributed SchedulingStrategy = "Distributed" + // FleetGameServerSetLabel is the label that the name of the Fleet // is set to on the GameServerSet the Fleet controls FleetGameServerSetLabel = stable.GroupName + "/fleet" ) +type SchedulingStrategy string + // +genclient // +genclient:noStatus // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -56,6 +74,8 @@ type FleetSpec struct { Replicas int32 `json:"replicas"` // Deployment strategy Strategy appsv1.DeploymentStrategy `json:"strategy"` + // Scheduling strategy. Defaults to "Packed". + Scheduling SchedulingStrategy `json:"scheduling"` // Template the GameServer template to apply for this Fleet Template GameServerTemplateSpec `json:"template"` } @@ -105,6 +125,10 @@ func (f *Fleet) ApplyDefaults() { f.Spec.Strategy.Type = appsv1.RollingUpdateDeploymentStrategyType } + if f.Spec.Scheduling == "" { + f.Spec.Scheduling = Packed + } + if f.Spec.Strategy.Type == appsv1.RollingUpdateDeploymentStrategyType { if f.Spec.Strategy.RollingUpdate == nil { f.Spec.Strategy.RollingUpdate = &appsv1.RollingUpdateDeployment{} diff --git a/pkg/apis/stable/v1alpha1/fleet_test.go b/pkg/apis/stable/v1alpha1/fleet_test.go index 46e13eb3a0..f2581c854b 100644 --- a/pkg/apis/stable/v1alpha1/fleet_test.go +++ b/pkg/apis/stable/v1alpha1/fleet_test.go @@ -60,11 +60,13 @@ func TestFleetApplyDefaults(t *testing.T) { // gate assert.EqualValues(t, "", f.Spec.Strategy.Type) + assert.EqualValues(t, "", f.Spec.Scheduling) f.ApplyDefaults() assert.Equal(t, appsv1.RollingUpdateDeploymentStrategyType, f.Spec.Strategy.Type) assert.Equal(t, "25%", f.Spec.Strategy.RollingUpdate.MaxUnavailable.String()) assert.Equal(t, "25%", f.Spec.Strategy.RollingUpdate.MaxSurge.String()) + assert.Equal(t, Packed, f.Spec.Scheduling) } func TestFleetUpperBoundReplicas(t *testing.T) { diff --git a/pkg/fleetallocation/controller.go b/pkg/fleetallocation/controller.go index 2666011b4f..3edbcd6a48 100644 --- a/pkg/fleetallocation/controller.go +++ b/pkg/fleetallocation/controller.go @@ -20,7 +20,7 @@ import ( "sync" "agones.dev/agones/pkg/apis/stable" - stablev1alpha1 "agones.dev/agones/pkg/apis/stable/v1alpha1" + "agones.dev/agones/pkg/apis/stable/v1alpha1" "agones.dev/agones/pkg/client/clientset/versioned" getterv1alpha1 "agones.dev/agones/pkg/client/clientset/versioned/typed/stable/v1alpha1" "agones.dev/agones/pkg/client/informers/externalversions" @@ -95,7 +95,7 @@ func NewController( eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) c.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "fleetallocation-controller"}) - kind := stablev1alpha1.Kind("FleetAllocation") + kind := v1alpha1.Kind("FleetAllocation") wh.AddHandler("/mutate", kind, admv1beta1.Create, c.creationMutationHandler) wh.AddHandler("/validate", kind, admv1beta1.Create, c.creationValidationHandler) wh.AddHandler("/validate", kind, admv1beta1.Update, c.mutationValidationHandler) @@ -120,7 +120,7 @@ func (c *Controller) Run(workers int, stop <-chan struct{}) error { func (c *Controller) creationMutationHandler(review admv1beta1.AdmissionReview) (admv1beta1.AdmissionReview, error) { c.logger.WithField("review", review).Info("creationMutationHandler") obj := review.Request.Object - fa := &stablev1alpha1.FleetAllocation{} + fa := &v1alpha1.FleetAllocation{} err := json.Unmarshal(obj.Raw, fa) if err != nil { @@ -157,10 +157,10 @@ func (c *Controller) creationMutationHandler(review admv1beta1.AdmissionReview) } // When a GameServer is deleted, the FleetAllocation should go with it - ref := metav1.NewControllerRef(gs, stablev1alpha1.SchemeGroupVersion.WithKind("GameServer")) + ref := metav1.NewControllerRef(gs, v1alpha1.SchemeGroupVersion.WithKind("GameServer")) fa.ObjectMeta.OwnerReferences = append(fa.ObjectMeta.OwnerReferences, *ref) - fa.Status = stablev1alpha1.FleetAllocationStatus{GameServer: gs} + fa.Status = v1alpha1.FleetAllocationStatus{GameServer: gs} newFA, err := json.Marshal(fa) if err != nil { @@ -191,7 +191,7 @@ func (c *Controller) creationMutationHandler(review admv1beta1.AdmissionReview) func (c *Controller) creationValidationHandler(review admv1beta1.AdmissionReview) (admv1beta1.AdmissionReview, error) { c.logger.WithField("review", review).Info("creationValidationHandler") obj := review.Request.Object - fa := &stablev1alpha1.FleetAllocation{} + fa := &v1alpha1.FleetAllocation{} if err := json.Unmarshal(obj.Raw, fa); err != nil { return review, errors.Wrapf(err, "error unmarshalling original FleetAllocation json: %s", obj.Raw) } @@ -225,8 +225,8 @@ func (c *Controller) creationValidationHandler(review admv1beta1.AdmissionReview func (c *Controller) mutationValidationHandler(review admv1beta1.AdmissionReview) (admv1beta1.AdmissionReview, error) { c.logger.WithField("review", review).Info("mutationValidationHandler") - newFA := &stablev1alpha1.FleetAllocation{} - oldFA := &stablev1alpha1.FleetAllocation{} + newFA := &v1alpha1.FleetAllocation{} + oldFA := &v1alpha1.FleetAllocation{} if err := json.Unmarshal(review.Request.Object.Raw, newFA); err != nil { return review, errors.Wrapf(err, "error unmarshalling new FleetAllocation json: %s", review.Request.Object.Raw) @@ -256,8 +256,8 @@ func (c *Controller) mutationValidationHandler(review admv1beta1.AdmissionReview } // allocate allocated a GameServer from a given Fleet -func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.FleetAllocationMeta) (*stablev1alpha1.GameServer, error) { - var allocation *stablev1alpha1.GameServer +func (c *Controller) allocate(f *v1alpha1.Fleet, fam *v1alpha1.FleetAllocationMeta) (*v1alpha1.GameServer, error) { + var allocation *v1alpha1.GameServer // can only allocate one at a time, as we don't want two separate processes // trying to allocate the same GameServer to different clients c.allocationMutex.Lock() @@ -272,11 +272,11 @@ func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.Fleet return allocation, err } - for _, gs := range gsList { - if gs.Status.State == stablev1alpha1.Ready && gs.ObjectMeta.DeletionTimestamp.IsZero() { - allocation = gs - break - } + switch f.Spec.Scheduling { + case v1alpha1.Packed: + allocation = findReadyGameServerForAllocation(gsList, packedComparator) + case v1alpha1.Distributed: + allocation = findReadyGameServerForAllocation(gsList, distributedComparator) } if allocation == nil { @@ -284,7 +284,7 @@ func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.Fleet } gsCopy := allocation.DeepCopy() - gsCopy.Status.State = stablev1alpha1.Allocated + gsCopy.Status.State = v1alpha1.Allocated if fam != nil { c.patchMetadata(gsCopy, fam) @@ -300,7 +300,7 @@ func (c *Controller) allocate(f *stablev1alpha1.Fleet, fam *stablev1alpha1.Fleet } // patch the labels and annotations of an allocated GameServer with metadata from a FleetAllocation -func (c *Controller) patchMetadata(gs *stablev1alpha1.GameServer, fam *stablev1alpha1.FleetAllocationMeta) { +func (c *Controller) patchMetadata(gs *v1alpha1.GameServer, fam *v1alpha1.FleetAllocationMeta) { // patch ObjectMeta labels if fam.Labels != nil { if gs.ObjectMeta.Labels == nil { diff --git a/pkg/fleetallocation/controller_test.go b/pkg/fleetallocation/controller_test.go index 51e2a37e37..ea4c518ac9 100644 --- a/pkg/fleetallocation/controller_test.go +++ b/pkg/fleetallocation/controller_test.go @@ -141,6 +141,8 @@ func TestControllerMutationValidationHandler(t *testing.T) { } func TestControllerAllocate(t *testing.T) { + t.Parallel() + f, gsSet, gsList := defaultFixtures(4) c, m := newFakeController() n := metav1.Now() @@ -210,6 +212,98 @@ func TestControllerAllocate(t *testing.T) { assert.False(t, updated) } +func TestControllerAllocatePriority(t *testing.T) { + t.Parallel() + + n1 := "node1" + n2 := "node2" + + run := func(t *testing.T, name string, test func(t *testing.T, c *Controller, fleet *v1alpha1.Fleet)) { + f, gsSet, gsList := defaultFixtures(4) + c, m := newFakeController() + + gsList[0].Status.NodeName = n1 + gsList[1].Status.NodeName = n2 + gsList[2].Status.NodeName = n1 + gsList[3].Status.NodeName = n1 + + m.AgonesClient.AddReactor("list", "fleets", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &v1alpha1.FleetList{Items: []v1alpha1.Fleet{*f}}, nil + }) + m.AgonesClient.AddReactor("list", "gameserversets", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &v1alpha1.GameServerSetList{Items: []v1alpha1.GameServerSet{*gsSet}}, nil + }) + m.AgonesClient.AddReactor("list", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &v1alpha1.GameServerList{Items: gsList}, nil + }) + + gsWatch := watch.NewFake() + m.AgonesClient.AddWatchReactor("gameservers", k8stesting.DefaultWatchReactor(gsWatch, nil)) + m.AgonesClient.AddReactor("update", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) { + ua := action.(k8stesting.UpdateAction) + gs := ua.GetObject().(*v1alpha1.GameServer) + gsWatch.Modify(gs) + return true, gs, nil + }) + + _, cancel := agtesting.StartInformers(m) + defer cancel() + + t.Run(name, func(t *testing.T) { + test(t, c, f) + }) + } + + run(t, "packed", func(t *testing.T, c *Controller, f *v1alpha1.Fleet) { + // priority should be node1, then node2 + gs, err := c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n2, gs.Status.NodeName) + + // should have none left + _, err = c.allocate(f, nil) + assert.NotNil(t, err) + }) + + run(t, "distributed", func(t *testing.T, c *Controller, f *v1alpha1.Fleet) { + // make a copy, to avoid the race check + f = f.DeepCopy() + f.Spec.Scheduling = v1alpha1.Distributed + // should go node2, then node1 + gs, err := c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n2, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + gs, err = c.allocate(f, nil) + assert.Nil(t, err) + assert.Equal(t, n1, gs.Status.NodeName) + + // should have none left + _, err = c.allocate(f, nil) + assert.NotNil(t, err) + }) +} + func TestControllerAllocateMutex(t *testing.T) { t.Parallel() @@ -270,6 +364,7 @@ func defaultFixtures(gsLen int) (*v1alpha1.Fleet, *v1alpha1.GameServerSet, []v1a Template: v1alpha1.GameServerTemplateSpec{}, }, } + f.ApplyDefaults() gsSet := f.GameServerSet() gsSet.ObjectMeta.Name = "gsSet1" var gsList []v1alpha1.GameServer diff --git a/pkg/fleetallocation/find.go b/pkg/fleetallocation/find.go new file mode 100644 index 0000000000..ba1a8501af --- /dev/null +++ b/pkg/fleetallocation/find.go @@ -0,0 +1,88 @@ +// Copyright 2018 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fleetallocation + +import ( + "agones.dev/agones/pkg/apis/stable/v1alpha1" +) + +// nodeCount is just a convenience data structure for +// keeping relevant GameServer counts about Nodes +type nodeCount struct { + ready int64 + allocated int64 +} + +// findReadyGameServerForAllocation is a O(n) implementation to find a GameServer with priority +// defined in the comparator function. +func findReadyGameServerForAllocation(gsList []*v1alpha1.GameServer, comparator func(bestCount, currentCount *nodeCount) bool) *v1alpha1.GameServer { + counts := map[string]*nodeCount{} + // track potential gameservers, one for each node + allocatableGameServers := map[string]*v1alpha1.GameServer{} + + // count up the number of allocated and ready game servers that exist + // also, since we're already looping through, track one Ready GameServer + // per node, so we can use that as a short list to allocate from + for _, gs := range gsList { + if gs.DeletionTimestamp.IsZero() && + (gs.Status.State == v1alpha1.Allocated || gs.Status.State == v1alpha1.Ready) { + _, ok := counts[gs.Status.NodeName] + if !ok { + counts[gs.Status.NodeName] = &nodeCount{} + } + + if gs.Status.State == v1alpha1.Allocated { + counts[gs.Status.NodeName].allocated++ + } else if gs.Status.State == v1alpha1.Ready { + counts[gs.Status.NodeName].ready++ + allocatableGameServers[gs.Status.NodeName] = gs + } + } + } + + // track the best node count + var bestCount *nodeCount + // the current GameServer from the node with the most GameServers (allocated, ready) + var bestGS *v1alpha1.GameServer + + for nodeName, count := range counts { + // count.ready > 0: no reason to check if we don't have ready GameServers on this node + // bestGS == nil: if there is no best GameServer, then this node & GameServer is the always the best + if count.ready > 0 && (bestGS == nil || comparator(bestCount, count)) { + bestCount = count + bestGS = allocatableGameServers[nodeName] + } + } + + return bestGS +} + +// packedComparator prioritises Nodes with GameServers that are allocated, and then Nodes with the most +// Ready GameServers -- this will bin pack allocated game servers together. +func packedComparator(bestCount, currentCount *nodeCount) bool { + if currentCount.allocated == bestCount.allocated && currentCount.ready > bestCount.ready { + return true + } else if currentCount.allocated > bestCount.allocated { + return true + } + + return false +} + +// distributedComparator is the inverse of the packed comparator, +// looking to distribute allocated gameservers on as many nodes as possible. +func distributedComparator(bestCount, currentCount *nodeCount) bool { + return !packedComparator(bestCount, currentCount) +} diff --git a/pkg/fleetallocation/find_test.go b/pkg/fleetallocation/find_test.go new file mode 100644 index 0000000000..ce80877b34 --- /dev/null +++ b/pkg/fleetallocation/find_test.go @@ -0,0 +1,125 @@ +// Copyright 2018 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fleetallocation + +import ( + "testing" + + "agones.dev/agones/pkg/apis/stable/v1alpha1" + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestFindPackedReadyGameServer(t *testing.T) { + t.Parallel() + + t.Run("test one", func(t *testing.T) { + n := metav1.Now() + + gsList := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "gs6", DeletionTimestamp: &n}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Error}}, + } + + gs := findReadyGameServerForAllocation(gsList, packedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + // mock that the first game server is allocated + gsList[1].Status.State = v1alpha1.Allocated + gs = findReadyGameServerForAllocation(gsList, packedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + gsList[2].Status.State = v1alpha1.Allocated + gs = findReadyGameServerForAllocation(gsList, packedComparator) + assert.Nil(t, gs) + }) + + t.Run("allocation trap", func(t *testing.T) { + gsList := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Allocated}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs6"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs7"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs8"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + } + + gs := findReadyGameServerForAllocation(gsList, packedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + }) +} + +func TestFindDistributedReadyGameServer(t *testing.T) { + t.Parallel() + + n := metav1.Now() + gsList := []*v1alpha1.GameServer{ + {ObjectMeta: metav1.ObjectMeta{Name: "gs6", DeletionTimestamp: &n}, Status: v1alpha1.GameServerStatus{NodeName: "node3", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs1"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs2"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs3"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs4"}, Status: v1alpha1.GameServerStatus{NodeName: "node1", State: v1alpha1.Error}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs5"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs6"}, Status: v1alpha1.GameServerStatus{NodeName: "node2", State: v1alpha1.Ready}}, + {ObjectMeta: metav1.ObjectMeta{Name: "gs7"}, Status: v1alpha1.GameServerStatus{NodeName: "node3", State: v1alpha1.Ready}}, + } + + gs := findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node3", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[7].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[5].Status.State = v1alpha1.Allocated + assert.Equal(t, "node2", gsList[5].Status.NodeName) + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[1].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node2", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[6].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[2].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Equal(t, "node1", gs.Status.NodeName) + assert.Equal(t, v1alpha1.Ready, gs.Status.State) + + gsList[3].Status.State = v1alpha1.Allocated + + gs = findReadyGameServerForAllocation(gsList, distributedComparator) + assert.Nil(t, gs) +} diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index f655756108..9480bcfaae 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -39,25 +39,34 @@ const ( func TestCreateFleetAndAllocate(t *testing.T) { t.Parallel() - fleets := framework.AgonesClient.StableV1alpha1().Fleets(defaultNs) - flt, err := fleets.Create(defaultFleet()) - if assert.Nil(t, err) { - defer fleets.Delete(flt.ObjectMeta.Name, nil) // nolint:errcheck - } + fixtures := []v1alpha1.SchedulingStrategy{v1alpha1.Packed, v1alpha1.Distributed} + + for _, strategy := range fixtures { + t.Run(string(strategy), func(t *testing.T) { + fleets := framework.AgonesClient.StableV1alpha1().Fleets(defaultNs) + fleet := defaultFleet() + fleet.Spec.Scheduling = strategy + flt, err := fleets.Create(fleet) + if assert.Nil(t, err) { + defer fleets.Delete(flt.ObjectMeta.Name, nil) // nolint:errcheck + } - err = framework.WaitForFleetCondition(flt, e2e.FleetReadyCount(flt.Spec.Replicas)) - assert.Nil(t, err, "fleet not ready") + err = framework.WaitForFleetCondition(flt, e2e.FleetReadyCount(flt.Spec.Replicas)) + assert.Nil(t, err, "fleet not ready") - fa := &v1alpha1.FleetAllocation{ - ObjectMeta: metav1.ObjectMeta{GenerateName: "allocatioon-", Namespace: defaultNs}, - Spec: v1alpha1.FleetAllocationSpec{ - FleetName: flt.ObjectMeta.Name, - }, - } + fa := &v1alpha1.FleetAllocation{ + ObjectMeta: metav1.ObjectMeta{GenerateName: "allocatioon-", Namespace: defaultNs}, + Spec: v1alpha1.FleetAllocationSpec{ + FleetName: flt.ObjectMeta.Name, + }, + } - fa, err = framework.AgonesClient.StableV1alpha1().FleetAllocations(defaultNs).Create(fa) - assert.Nil(t, err) - assert.Equal(t, v1alpha1.Allocated, fa.Status.GameServer.Status.State) + fa, err = framework.AgonesClient.StableV1alpha1().FleetAllocations(defaultNs).Create(fa) + assert.Nil(t, err) + assert.Equal(t, v1alpha1.Allocated, fa.Status.GameServer.Status.State) + }) + + } } func TestScaleFleetUpAndDownWithAllocation(t *testing.T) {