Skip to content

Commit

Permalink
Specify CPU Request for the SDK Server Sidecar
Browse files Browse the repository at this point in the history
This provides the mechanism (and defaults) for being able to set
both the CPU request, and CPU limits for the SDK Server `GameServer` sidecar.

I've only set the Request level, as it seems that the major issue is not
CPU usage, but actually how the scheduler allots space for the sidecar
(by default 100/0.1 vCPU is alloted to each container.

After discussion, the CPU request has been set to 30m, but is also
configurable via the helm chart.

I've not set a CPU limit, as I found when setting a low (<= 20m) CPU limit
on the sidecar it mostly stopped working. But if people want to experiment with
this, it is also configurable via the Helm chart.

Closes #344
  • Loading branch information
markmandel committed Oct 19, 2018
1 parent 1923a41 commit c6f714a
Show file tree
Hide file tree
Showing 12 changed files with 162 additions and 60 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ Documentation and usage guides on how to develop and host dedicated game servers

### Advanced
- [Scheduling and Autoscaling](./docs/scheduling_autoscaling.md)
- [Limiting CPU/Memory](./docs/limiting_resources.md)

## Get involved

Expand Down
94 changes: 53 additions & 41 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,23 @@ import (
"github.com/spf13/pflag"
"github.com/spf13/viper"
extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)

const (
sidecarFlag = "sidecar"
pullSidecarFlag = "always-pull-sidecar"
minPortFlag = "min-port"
maxPortFlag = "max-port"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
workers = 2
defaultResync = 30 * time.Second
sidecarImageFlag = "sidecar-image"
sidecarCPURequestFlag = "sidecar-cpu-request"
sidecarCPULimitFlag = "sidecar-cpu-limit"
pullSidecarFlag = "always-pull-sidecar"
minPortFlag = "min-port"
maxPortFlag = "max-port"
certFileFlag = "cert-file"
keyFileFlag = "key-file"
workers = 2
defaultResync = 30 * time.Second
)

var (
Expand All @@ -63,6 +66,9 @@ var (
// main starts the operator for the gameserver CRD
func main() {
ctlConf := parseEnvFlags()
logger.WithField("version", pkg.Version).
WithField("ctlConf", ctlConf).Info("starting gameServer operator...")

if err := ctlConf.validate(); err != nil {
logger.WithError(err).Fatal("Could not create controller from environment or flags")
}
Expand All @@ -88,14 +94,15 @@ func main() {
}

health := healthcheck.NewHandler()
wh := webhooks.NewWebHook(ctlConf.certFile, ctlConf.keyFile)
wh := webhooks.NewWebHook(ctlConf.CertFile, ctlConf.KeyFile)
agonesInformerFactory := externalversions.NewSharedInformerFactory(agonesClient, defaultResync)
kubeInformationFactory := informers.NewSharedInformerFactory(kubeClient, defaultResync)

allocationMutex := &sync.Mutex{}

gsController := gameservers.NewController(wh, health, allocationMutex,
ctlConf.minPort, ctlConf.maxPort, ctlConf.sidecarImage, ctlConf.alwaysPullSidecar,
ctlConf.MinPort, ctlConf.MaxPort, ctlConf.SidecarImage, ctlConf.AlwaysPullSidecar,
ctlConf.SidecarCPURequest, ctlConf.SidecarCPULimit,
kubeClient, kubeInformationFactory, extClient, agonesClient, agonesInformerFactory)
gsSetController := gameserversets.NewController(wh, health, allocationMutex,
kubeClient, extClient, agonesClient, agonesInformerFactory)
Expand Down Expand Up @@ -132,12 +139,16 @@ func parseEnvFlags() config {
}

base := filepath.Dir(exec)
viper.SetDefault(sidecarFlag, "gcr.io/agones-images/agones-sdk:"+pkg.Version)
viper.SetDefault(sidecarImageFlag, "gcr.io/agones-images/agones-sdk:"+pkg.Version)
viper.SetDefault(sidecarCPURequestFlag, "0")
viper.SetDefault(sidecarCPULimitFlag, "0")
viper.SetDefault(pullSidecarFlag, false)
viper.SetDefault(certFileFlag, filepath.Join(base, "certs/server.crt"))
viper.SetDefault(keyFileFlag, filepath.Join(base, "certs/server.key"))

pflag.String(sidecarFlag, viper.GetString(sidecarFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable")
pflag.String(sidecarImageFlag, viper.GetString(sidecarImageFlag), "Flag to overwrite the GameServer sidecar image that is used. Can also use SIDECAR env variable")
pflag.String(sidecarCPULimitFlag, viper.GetString(sidecarCPULimitFlag), "Flag to overwrite the GameServer sidecar container's cpu limit. Can also use SIDECAR_CPU_LIMIT env variable")
pflag.String(sidecarCPURequestFlag, viper.GetString(sidecarCPURequestFlag), "Flag to overwrite the GameServer sidecar container's cpu request. Can also use SIDECAR_CPU_REQUEST env variable")
pflag.Bool(pullSidecarFlag, viper.GetBool(pullSidecarFlag), "For development purposes, set the sidecar image to have a ImagePullPolicy of Always. Can also use ALWAYS_PULL_SIDECAR env variable")
pflag.Int32(minPortFlag, 0, "Required. The minimum port that that a GameServer can be allocated to. Can also use MIN_PORT env variable.")
pflag.Int32(maxPortFlag, 0, "Required. The maximum port that that a GameServer can be allocated to. Can also use MAX_PORT env variable")
Expand All @@ -146,55 +157,56 @@ func parseEnvFlags() config {
pflag.Parse()

viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_"))
runtime.Must(viper.BindEnv(sidecarFlag))
runtime.Must(viper.BindEnv(sidecarImageFlag))
runtime.Must(viper.BindEnv(sidecarCPULimitFlag))
runtime.Must(viper.BindEnv(sidecarCPURequestFlag))
runtime.Must(viper.BindEnv(pullSidecarFlag))
runtime.Must(viper.BindEnv(minPortFlag))
runtime.Must(viper.BindEnv(maxPortFlag))
runtime.Must(viper.BindEnv(keyFileFlag))
runtime.Must(viper.BindEnv(certFileFlag))
runtime.Must(viper.BindPFlags(pflag.CommandLine))

minPort := int32(viper.GetInt64(minPortFlag))
maxPort := int32(viper.GetInt64(maxPortFlag))
sidecarImage := viper.GetString(sidecarFlag)
alwaysPullSidecar := viper.GetBool(pullSidecarFlag)
keyFile := viper.GetString(keyFileFlag)
certFile := viper.GetString(certFileFlag)

logger.WithField(sidecarFlag, sidecarImage).
WithField("minPort", minPort).
WithField("maxPort", maxPort).
WithField(keyFileFlag, keyFile).
WithField(certFileFlag, certFile).
WithField("alwaysPullSidecarImage", alwaysPullSidecar).
WithField("Version", pkg.Version).Info("starting gameServer operator...")
request, err := resource.ParseQuantity(viper.GetString(sidecarCPURequestFlag))
if err != nil {
logger.WithError(err).Fatalf("could not parse %s", sidecarCPURequestFlag)
}

limit, err := resource.ParseQuantity(viper.GetString(sidecarCPULimitFlag))
if err != nil {
logger.WithError(err).Fatalf("could not parse %s", sidecarCPULimitFlag)
}

return config{
minPort: minPort,
maxPort: maxPort,
sidecarImage: sidecarImage,
alwaysPullSidecar: alwaysPullSidecar,
keyFile: keyFile,
certFile: certFile,
MinPort: int32(viper.GetInt64(minPortFlag)),
MaxPort: int32(viper.GetInt64(maxPortFlag)),
SidecarImage: viper.GetString(sidecarImageFlag),
SidecarCPURequest: request,
SidecarCPULimit: limit,
AlwaysPullSidecar: viper.GetBool(pullSidecarFlag),
KeyFile: viper.GetString(keyFileFlag),
CertFile: viper.GetString(certFileFlag),
}
}

// config stores all required configuration to create a game server controller.
type config struct {
minPort int32
maxPort int32
sidecarImage string
alwaysPullSidecar bool
keyFile string
certFile string
MinPort int32
MaxPort int32
SidecarImage string
SidecarCPURequest resource.Quantity
SidecarCPULimit resource.Quantity
AlwaysPullSidecar bool
KeyFile string
CertFile string
}

// validate ensures the ctlConfig data is valid.
func (c config) validate() error {
if c.minPort <= 0 || c.maxPort <= 0 {
if c.MinPort <= 0 || c.MaxPort <= 0 {
return errors.New("min Port and Max Port values are required")
}
if c.maxPort < c.minPort {
if c.MaxPort < c.MinPort {
return errors.New("max Port cannot be set less that the Min Port")
}
return nil
Expand Down
57 changes: 57 additions & 0 deletions docs/limiting_resources.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Limiting CPU & Memory

Kubernetes natively has inbuilt capabilities for requesting and limiting both CPU and Memory usage of running containers.

As a short description:

- CPU `Requests` are limits that are applied when there is CPU congestion, and as such can burst above their set limits.
- CPU `Limits` are hard limits on how much CPU time the particular container gets access to.

This is useful for game servers, not just as a mechanism to distribute compute resources evenly, but also as a way
to advice the Kubernetes scheduler how many game server processes it is able to fit into a given node in the cluster.

It's worth reading the [Managing Compute Resources for Containers](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/)
Kubernetes documentation for more details on "requests" and "limits" to both CPU and Memory, and how to configure them.

## GameServers

Since the `GameServer` specification provides a full [`PodSpecTemplate`](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.10/#podtemplatespec-v1-core),
we can take advantage of both resource limits and requests in our `GameServer` configurations.

For example, to set a CPU limit on our `GameServer` configuration of 250m/0.25 of a cpu,
we could do so as followed:

```yaml
apiVersion: "stable.agones.dev/v1alpha1"
kind: GameServer
metadata:
name: "simple-udp"
spec:
ports:
- name: default
portPolicy: "dynamic"
containerPort: 7654
template:
spec:
containers:
- name: simple-udp
image: gcr.io/agones-images/udp-server:0.4
resources:
limit:
cpu: "250m" #this is our limit here
```
If you do not set a limit or request, the default is set my Kubernetes at a 100m CPU request.
## SDK GameServer sidecar
⚠️⚠️⚠️ **This is currently a development feature and has not been released** ⚠️⚠️⚠️
You may also want to tweak the CPU request or limits on the SDK `GameServer` sidecar process that spins up alongside
each game server container.

You can do this through the [Helm configuration](../install/helm/README.md#configuration) when installing Agones.

By default, this is set to having a CPU request value of 30m, with no hard CPU limit. This ensures that the sidecar always has enough CPU
to function, but it is configurable in case a lower, or higher value is required on your clusters, or if you desire
hard limit.
4 changes: 4 additions & 0 deletions install/helm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ The following tables lists the configurable parameters of the Agones chart and t
| `agones.image.controller.pullPolicy` | Image pull policy for the controller | `IfNotPresent` |
| `agones.image.controller.pullSecret` | Image pull secret for the controller | `` |
| `agones.image.sdk.name` | Image name for the sdk | `agones-sdk` |
| `agones.image.sdk.cpuRequest` | (⚠️ Development feature ⚠️) the [cpu request][constraints] for sdk server container | `30m` |
| `agones.image.sdk.cpuLimit` | (⚠️ Development feature ⚠️) the [cpu limit][constraints] for the sdk server container | `0` (none) |
| `agones.image.sdk.alwaysPull` | Tells if the sdk image should always be pulled | `false` |
| `agones.controller.healthCheck.http.port` | Port to use for liveness probe service | `8080` |
| `agones.controller.healthCheck.initialDelaySeconds` | Initial delay before performing the first probe (in seconds) | `3` |
Expand All @@ -104,6 +106,8 @@ The following tables lists the configurable parameters of the Agones chart and t
| `gameservers.minPort` | Minimum port to use for dynamic port allocation | `7000` |
| `gameservers.maxPort` | Maximum port to use for dynamic port allocation | `8000` |

[constraints]: https://kubernetes.io/docs/tasks/administer-cluster/manage-resources/cpu-constraint-namespace/

Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. For example,

```bash
Expand Down
12 changes: 8 additions & 4 deletions install/helm/agones/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,20 @@ spec:
image: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.controller.name}}:{{ .Values.agones.image.tag }}"
imagePullPolicy: {{ .Values.agones.image.controller.pullPolicy }}
env:
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: {{ .Values.agones.image.sdk.alwaysPull | quote }}
# minimum port that can be exposed to GameServer traffic
- name: MIN_PORT
value: {{ .Values.gameservers.minPort | quote }}
value: {{ .Values.gameservers.minPort | quote }}
# maximum port that can be exposed to GameServer traffic
- name: MAX_PORT
value: {{ .Values.gameservers.maxPort | quote }}
- name: SIDECAR # overwrite the GameServer sidecar image that is used
- name: SIDECAR_IMAGE # overwrite the GameServer sidecar image that is used
value: "{{ .Values.agones.image.registry }}/{{ .Values.agones.image.sdk.name}}:{{ .Values.agones.image.tag }}"
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: {{ .Values.agones.image.sdk.alwaysPull | quote }}
- name: SIDECAR_CPU_REQUEST
value: {{ .Values.agones.image.sdk.cpuRequest | quote }}
- name: SIDECAR_CPU_LIMIT
value: {{ .Values.agones.image.sdk.cpuLimit | quote }}
livenessProbe:
httpGet:
path: /live
Expand Down
2 changes: 2 additions & 0 deletions install/helm/agones/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ agones:
pullPolicy: IfNotPresent
sdk:
name: agones-sdk
cpuRequest: 30m
cpuLimit: 0
alwaysPull: false

gameservers:
Expand Down
12 changes: 8 additions & 4 deletions install/yaml/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -859,16 +859,20 @@ spec:
image: "gcr.io/agones-images/agones-controller:0.6.0-rc"
imagePullPolicy: IfNotPresent
env:
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: "false"
# minimum port that can be exposed to GameServer traffic
- name: MIN_PORT
value: "7000"
value: "7000"
# maximum port that can be exposed to GameServer traffic
- name: MAX_PORT
value: "8000"
- name: SIDECAR # overwrite the GameServer sidecar image that is used
- name: SIDECAR_IMAGE # overwrite the GameServer sidecar image that is used
value: "gcr.io/agones-images/agones-sdk:0.6.0-rc"
- name: ALWAYS_PULL_SIDECAR # set the sidecar imagePullPolicy to Always
value: "false"
- name: SIDECAR_CPU_REQUEST
value: "30m"
- name: SIDECAR_CPU_LIMIT
value: "0"
livenessProbe:
httpGet:
path: /live
Expand Down
22 changes: 20 additions & 2 deletions pkg/gameservers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1beta1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/intstr"
Expand All @@ -59,6 +60,8 @@ type Controller struct {
logger *logrus.Entry
sidecarImage string
alwaysPullSidecarImage bool
sidecarCPURequest resource.Quantity
sidecarCPULimit resource.Quantity
crdGetter v1beta1.CustomResourceDefinitionInterface
podGetter typedcorev1.PodsGetter
podLister corelisterv1.PodLister
Expand All @@ -71,8 +74,9 @@ type Controller struct {
healthController *HealthController
workerqueue *workerqueue.WorkerQueue
allocationMutex *sync.Mutex
stop <-chan struct{}
recorder record.EventRecorder
stop <-chan struct {
}
recorder record.EventRecorder
}

// NewController returns a new gameserver crd controller
Expand All @@ -83,6 +87,8 @@ func NewController(
minPort, maxPort int32,
sidecarImage string,
alwaysPullSidecarImage bool,
sidecarCPURequest resource.Quantity,
sidecarCPULimit resource.Quantity,
kubeClient kubernetes.Interface,
kubeInformerFactory informers.SharedInformerFactory,
extClient extclientset.Interface,
Expand All @@ -95,6 +101,8 @@ func NewController(

c := &Controller{
sidecarImage: sidecarImage,
sidecarCPULimit: sidecarCPULimit,
sidecarCPURequest: sidecarCPURequest,
alwaysPullSidecarImage: alwaysPullSidecarImage,
allocationMutex: allocationMutex,
crdGetter: extClient.ApiextensionsV1beta1().CustomResourceDefinitions(),
Expand Down Expand Up @@ -463,6 +471,7 @@ func (c *Controller) sidecar(gs *v1alpha1.GameServer) corev1.Container {
},
},
},
Resources: corev1.ResourceRequirements{},
LivenessProbe: &corev1.Probe{
Handler: corev1.Handler{
HTTPGet: &corev1.HTTPGetAction{
Expand All @@ -474,6 +483,15 @@ func (c *Controller) sidecar(gs *v1alpha1.GameServer) corev1.Container {
PeriodSeconds: 3,
},
}

if !c.sidecarCPURequest.IsZero() {
sidecar.Resources.Requests = corev1.ResourceList{corev1.ResourceCPU: c.sidecarCPURequest}
}

if !c.sidecarCPULimit.IsZero() {
sidecar.Resources.Limits = corev1.ResourceList{corev1.ResourceCPU: c.sidecarCPULimit}
}

if c.alwaysPullSidecarImage {
sidecar.ImagePullPolicy = corev1.PullAlways
}
Expand Down
Loading

0 comments on commit c6f714a

Please sign in to comment.