Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

INSIGHTS-157 - PDB <> HPA check #1057

Merged
merged 23 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/checks/reliability.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ key | default | description
`topologySpreadConstraint` | `warning` | Fails when there is no topology spread constraint on the pod
`hpaMaxAvailability` | `warning` | Fails when `maxAvailable` lesser or equal than `minAvailable` (if defined) for a HorizontalPodAutoscaler
`hpaMinAvailability` | `warning` | Fails when `minAvailable` (if defined) lesser or equal to one for a HorizontalPodAutoscaler
`pdbMinAvailableGreaterThanHPAMinReplicas` | `warning` | Fails when PDB `minAvailable` is greater than HPA `minReplicas`

## Background

Expand Down
1 change: 1 addition & 0 deletions pkg/config/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ var (
"rolebindingClusterAdminRole",
"hpaMaxAvailability",
"hpaMinAvailability",
"pdbMinAvailableGreaterThanHPAMinReplicas",
}

// BuiltInChecks contains the checks that come pre-installed w/ Polaris
Expand Down
12 changes: 4 additions & 8 deletions pkg/config/checks/missingPodDisruptionBudget.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,23 @@ controllers:
schema:
"$schema": http://json-schema.org/draft-07/schema#
type: object
required: [spec]
properties:
spec:
type: object
required: [template]
properties:
template:
type: object
required: [metadata]
properties:
metadata:
type: object
required: [labels]
properties:
labels:
type: object
minProperties: 1
required:
- labels
required:
- metadata
required:
- template
required:
- spec
additionalSchemaStrings:
policy/PodDisruptionBudget: |
type: object
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
successMessage: PDB and HPA are correctly configured
failureMessage: PDB minAvailable is greater than HPA minReplicas
category: Reliability
target: Controller
controllers:
include:
- Deployment
1 change: 1 addition & 0 deletions pkg/config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ checks:
topologySpreadConstraint: warning
hpaMaxAvailability: warning
hpaMinAvailability: warning
pdbMinAvailableGreaterThanHPAMinReplicas: warning

# efficiency
cpuRequestsMissing: warning
Expand Down
1 change: 1 addition & 0 deletions pkg/config/examples/config-full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ checks:
metadataAndInstanceMismatched: warning
hpaMaxAvailability: warning
hpaMinAvailability: warning
pdbMinAvailableGreaterThanHPAMinReplicas: warning

# efficiency
cpuRequestsMissing: warning
Expand Down
10 changes: 5 additions & 5 deletions pkg/kube/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func resolveControllerFromPod(ctx context.Context, podResource kubeAPICoreV1.Pod
err = cacheAllObjectsOfKind(ctx, firstOwner.APIVersion, firstOwner.Kind, dynamicClient, restMapper, objectCache)
}
if err != nil {
logrus.Warnf("Error caching objects of Kind %s %v", firstOwner.Kind, err)
logrus.Warnf("error caching objects of Kind %s %v", firstOwner.Kind, err)
break
}
abstractObject, ok = objectCache[key]
Expand All @@ -193,7 +193,7 @@ func resolveControllerFromPod(ctx context.Context, podResource kubeAPICoreV1.Pod

objMeta, err := meta.Accessor(&abstractObject)
if err != nil {
logrus.Warnf("Error retrieving parent metadata %s of API %s and Kind %s because of error: %v ", firstOwner.Name, firstOwner.APIVersion, firstOwner.Kind, err)
logrus.Warnf("error retrieving parent metadata %s of API %s and Kind %s because of error: %v ", firstOwner.Name, firstOwner.APIVersion, firstOwner.Kind, err)
return GenericResource{}, err
}
podSpec := GetPodSpec(abstractObject.Object)
Expand Down Expand Up @@ -221,7 +221,7 @@ func cacheSingleObject(ctx context.Context, apiVersion, kind, namespace, name st
logrus.Debugf("Caching a single %s", kind)
object, err := getObject(ctx, namespace, kind, apiVersion, name, dynamicClient, restMapper)
if err != nil {
logrus.Warnf("Error retrieving object %s/%s/%s/%s because of error: %v", kind, apiVersion, namespace, name, err)
logrus.Warnf("error retrieving object %s/%s/%s/%s because of error: %v", kind, apiVersion, namespace, name, err)
return err
}
key := fmt.Sprintf("%s/%s/%s", object.GetKind(), object.GetNamespace(), object.GetName())
Expand All @@ -235,13 +235,13 @@ func cacheAllObjectsOfKind(ctx context.Context, apiVersion, kind string, dynamic
fqKind := schema.FromAPIVersionAndKind(apiVersion, kind)
mapping, err := restMapper.RESTMapping(fqKind.GroupKind(), fqKind.Version)
if err != nil {
logrus.Warnf("Error retrieving mapping of API %s and Kind %s because of error: %v", apiVersion, kind, err)
logrus.Warnf("error retrieving mapping of API %s and Kind %s because of error: %v", apiVersion, kind, err)
return err
}

objects, err := dynamicClient.Resource(mapping.Resource).Namespace("").List(ctx, kubeAPIMetaV1.ListOptions{})
if err != nil {
logrus.Warnf("Error retrieving parent object API %s and Kind %s because of error: %v", mapping.Resource.Version, mapping.Resource.Resource, err)
logrus.Warnf("error retrieving parent object API %s and Kind %s because of error: %v", mapping.Resource.Version, mapping.Resource.Resource, err)
return err
}
for idx, object := range objects.Items {
Expand Down
6 changes: 3 additions & 3 deletions pkg/kube/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ func CreateResourceProviderFromPath(directory string) (*ResourceProvider, error)
}
err = resources.addResourcesFromYaml(string(contents))
if err != nil {
logrus.Warnf("Skipping %s: cannot add resource from YAML: %v", path, err)
logrus.Warnf("skipping %s: cannot add resource from YAML: %v", path, err)
}
return nil
}
Expand Down Expand Up @@ -340,7 +340,7 @@ func CreateResourceProviderFromAPI(ctx context.Context, kube kubernetes.Interfac
groupKind := parseGroupKind(maybeTransformKindIntoGroupKind(string(kind)))
mapping, err := restMapper.RESTMapping(groupKind)
if err != nil {
logrus.Warnf("Error retrieving mapping of Kind %s because of error: %v", kind, err)
logrus.Warnf("error retrieving mapping of Kind %s because of error: %v", kind, err)
return nil, err
}
if c.Namespace != "" && mapping.Scope.Name() != meta.RESTScopeNameNamespace {
Expand All @@ -351,7 +351,7 @@ func CreateResourceProviderFromAPI(ctx context.Context, kube kubernetes.Interfac
logrus.Info("Loading " + kind)
objects, err := dynamic.Resource(mapping.Resource).Namespace(c.Namespace).List(ctx, metav1.ListOptions{})
if err != nil {
logrus.Warnf("Error retrieving parent object API %s and Kind %s because of error: %v", mapping.Resource.Version, mapping.Resource.Resource, err)
logrus.Warnf("error retrieving parent object API %s and Kind %s because of error: %v", mapping.Resource.Version, mapping.Resource.Resource, err)
return nil, err
}
for _, obj := range objects.Items {
Expand Down
19 changes: 19 additions & 0 deletions pkg/validator/custom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package validator

import (
"sync"

"github.com/qri-io/jsonschema"
)

type validatorFunction func(test schemaTestCase) (bool, []jsonschema.ValError, error)

var validatorMapper = map[string]validatorFunction{}
var lock = &sync.Mutex{}

func registerCustomChecks(name string, check validatorFunction) {
lock.Lock()
defer lock.Unlock()

validatorMapper[name] = check
}
150 changes: 150 additions & 0 deletions pkg/validator/pdb_hpa_validator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package validator

import (
"fmt"
"strconv"
"strings"

"github.com/fairwindsops/polaris/pkg/kube"
"github.com/qri-io/jsonschema"
"github.com/sirupsen/logrus"
appsv1 "k8s.io/api/apps/v1"
autoscalingv1 "k8s.io/api/autoscaling/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/intstr"
)

func init() {
registerCustomChecks("pdbMinAvailableGreaterThanHPAMinReplicas", pdbMinAvailableGreaterThanHPAMinReplicas)
}

func pdbMinAvailableGreaterThanHPAMinReplicas(test schemaTestCase) (bool, []jsonschema.ValError, error) {
if test.ResourceProvider == nil {
logrus.Debug("ResourceProvider is nil")
return true, nil, nil
}

deployment := &appsv1.Deployment{}
err := runtime.DefaultUnstructuredConverter.FromUnstructured(test.Resource.Resource.Object, deployment)
if err != nil {
logrus.Warnf("error converting unstructured to Deployment: %v", err)
return true, nil, nil
}

attachedPDB, err := hasPDBAttached(*deployment, test.ResourceProvider.Resources["policy/PodDisruptionBudget"])
if err != nil {
logrus.Warnf("error getting PodDisruptionBudget: %v", err)
return true, nil, nil
}

attachedHPA, err := hasHPAAttached(*deployment, test.ResourceProvider.Resources["autoscaling/HorizontalPodAutoscaler"])
if err != nil {
logrus.Warnf("error getting HorizontalPodAutoscaler: %v", err)
return true, nil, nil
}

if attachedPDB != nil && attachedHPA != nil {
logrus.Debugf("both PDB and HPA are attached to deployment %s", deployment.Name)

pdbMinAvailable, isPercent, err := getIntOrPercentValueSafely(attachedPDB.Spec.MinAvailable)
if err != nil {
logrus.Warnf("error getting getIntOrPercentValueSafely: %v", err)
return true, nil, nil
}

if isPercent {
// if the value is a percentage, we need to calculate the actual value
if deployment.Spec.Replicas == nil {
vitorvezani marked this conversation as resolved.
Show resolved Hide resolved
logrus.Debug("deployment.Spec.Replicas is nil")
return true, nil, nil
}

pdbMinAvailable, err = intstr.GetScaledValueFromIntOrPercent(attachedPDB.Spec.MinAvailable, int(*deployment.Spec.Replicas), true)
if err != nil {
logrus.Warnf("error getting minAvailable value from PodDisruptionBudget: %v", err)
return true, nil, nil
}
}

if attachedHPA.Spec.MinReplicas != nil && pdbMinAvailable > int(*attachedHPA.Spec.MinReplicas) {
vitorvezani marked this conversation as resolved.
Show resolved Hide resolved
return false, []jsonschema.ValError{
{
PropertyPath: "spec.minAvailable",
InvalidValue: pdbMinAvailable,
Message: fmt.Sprintf("The minAvailable value in the PodDisruptionBudget(%s) is %d, which is greater than the minReplicas value in the HorizontalPodAutoscaler(%s) (%d)", attachedPDB.Name, pdbMinAvailable, attachedHPA.Name, *attachedHPA.Spec.MinReplicas),
},
}, nil
}
}

return true, nil, nil
}

func hasPDBAttached(deployment appsv1.Deployment, pdbs []kube.GenericResource) (*policyv1.PodDisruptionBudget, error) {
for _, generic := range pdbs {
pdb := &policyv1.PodDisruptionBudget{}
err := runtime.DefaultUnstructuredConverter.FromUnstructured(generic.Resource.Object, pdb)
if err != nil {
return nil, fmt.Errorf("error converting unstructured to PodDisruptionBudget: %v", err)
}

if pdb.Spec.Selector == nil {
logrus.Debug("pdb.Spec.Selector is nil")
continue
}

if matchesPDBForDeployment(deployment.Spec.Template.Labels, pdb.Spec.Selector.MatchLabels) {
return pdb, nil
}
}
return nil, nil
}

// matchesPDBForDeployment checks if the labels of the deployment match the labels of the PDB
func matchesPDBForDeployment(deploymentLabels, pdbLabels map[string]string) bool {
for key, value := range pdbLabels {
if deploymentLabels[key] == value {
return true
}
}
return false
}

func hasHPAAttached(deployment appsv1.Deployment, hpas []kube.GenericResource) (*autoscalingv1.HorizontalPodAutoscaler, error) {
for _, generic := range hpas {
hpa := &autoscalingv1.HorizontalPodAutoscaler{}
err := runtime.DefaultUnstructuredConverter.FromUnstructured(generic.Resource.Object, hpa)
if err != nil {
return nil, fmt.Errorf("error converting unstructured to HorizontalPodAutoscaler: %v", err)
}

if hpa.Spec.ScaleTargetRef.Kind == "Deployment" && hpa.Spec.ScaleTargetRef.Name == deployment.Name {
return hpa, nil
}
}
return nil, nil
}

// getIntOrPercentValueSafely is a safer version of getIntOrPercentValue based on private function intstr.getIntOrPercentValueSafely
func getIntOrPercentValueSafely(intOrStr *intstr.IntOrString) (int, bool, error) {
switch intOrStr.Type {
case intstr.Int:
return intOrStr.IntValue(), false, nil
case intstr.String:
isPercent := false
s := intOrStr.StrVal
if strings.HasSuffix(s, "%") {
isPercent = true
s = strings.TrimSuffix(intOrStr.StrVal, "%")
} else {
return 0, false, fmt.Errorf("invalid type: string is not a percentage")
}
v, err := strconv.Atoi(s)
if err != nil {
return 0, false, fmt.Errorf("invalid value %q: %v", intOrStr.StrVal, err)
}
return int(v), isPercent, nil
}
return 0, false, fmt.Errorf("invalid type: neither int nor percentage")
}
4 changes: 3 additions & 1 deletion pkg/validator/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ func applySchemaCheck(conf *config.Configuration, checkID string, test schemaTes
passes, issues, err = check.CheckContainer(test.Container)
} else if check.Validator.SchemaURI != "" {
passes, issues, err = check.CheckObject(test.Resource.Resource.Object)
} else if validatorMapper[checkID] != nil {
passes, issues, err = validatorMapper[checkID](test)
} else {
passes, issues, err = true, []jsonschema.ValError{}, nil
}
Expand All @@ -380,7 +382,7 @@ func applySchemaCheck(conf *config.Configuration, checkID string, test schemaTes
break
}
if test.ResourceProvider == nil {
logrus.Warnf("No ResourceProvider available, check %s will not work in this context (e.g. admission control)", checkID)
logrus.Warnf("no ResourceProvider available, check %s will not work in this context (e.g. admission control)", checkID)
break
}
resources := test.ResourceProvider.Resources[groupkind]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: zookeeper
spec:
replicas: 10
vitorvezani marked this conversation as resolved.
Show resolved Hide resolved
template:
metadata:
labels:
app.kubernetes.io/name: zookeeper
foo: bar
spec:
containers:
- name: zookeeper
image: zookeeper
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: zookeeper-pdb
spec:
minAvailable: 150% # 1.5 * 10 = 15
selector:
matchLabels:
app.kubernetes.io/name: zookeeper
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: zookeeper-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: zookeeper
minReplicas: 10
maxReplicas: 15
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 50
Loading
Loading