Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: recover from previously failed attempt #291

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions charts/aws-pca-issuer/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ spec:
{{- if .Values.disableApprovedCheck }}
- -disable-approved-check
{{- end }}
{{- if .Values.maxRetryDuration }}
- -max-retry-duration={{ .Values.maxRetryDuration }}
{{- end }}
{{- if .Values.disableClientSideRateLimiting }}
- -disable-client-side-rate-limiting
{{- end }}
Expand Down
11 changes: 7 additions & 4 deletions charts/aws-pca-issuer/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ disableApprovedCheck: false
# Disables Kubernetes client-side rate limiting (only use if API Priority & Fairness is enabled on the cluster).
disableClientSideRateLimiting: false

# Maxium duration to retry fullfilling a CertificateRequest
#maxRetryDuration: 180s

# Optional secrets used for pulling the container image
#
# For example:
Expand Down Expand Up @@ -55,12 +58,12 @@ service:
# Annotations to add to the issuer Pod
podAnnotations: {}

# Pod security context
# Pod security context
# +docs:property
podSecurityContext:
runAsUser: 65532

# Container security context
# Container security context
# +docs:property
securityContext:
allowPrivilegeEscalation: false
Expand Down Expand Up @@ -129,7 +132,7 @@ volumeMounts: []
# Configures a disruption budget for the deployment.
#
# Expects input structure similar to https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#poddisruptionbudgetspec-v1-policy
# WITHOUT the pod selector, which is handled by the chart.
# WITHOUT the pod selector, which is handled by the chart.
# Per https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#poddisruptionbudgetspec-v1-policy, `maxUnavailable` is mutually
# exclusive with `minAvailable`, you cannot set both.
#
Expand Down Expand Up @@ -177,7 +180,7 @@ approverRole:
# +docs:section=Monitoring

serviceMonitor:
# Create Prometheus ServiceMonitor
# Create Prometheus ServiceMonitor
create: false
# Annotations to add to the Prometheus ServiceMonitor
annotations: {}
Expand Down
16 changes: 16 additions & 0 deletions e2e/k8_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ func waitForIssuerReady(ctx context.Context, client *clientV1beta1.Client, name
return true, nil
}
}

fmt.Println("Issuer not ready yet - Current conditions:")
for _, condition := range issuer.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}
return false, nil
})
}
Expand All @@ -52,6 +57,11 @@ func waitForClusterIssuerReady(ctx context.Context, client *clientV1beta1.Client
}
}

fmt.Println("ClusterIssuer not ready yet - Current conditions:")
for _, condition := range issuer.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}

return false, nil
})
}
Expand All @@ -71,6 +81,12 @@ func waitForCertificateReady(ctx context.Context, client *cmclientv1.Certmanager
return true, nil
}
}

fmt.Println("Certifiate not ready yet - Current conditions:")
for _, condition := range certificate.Status.Conditions {
fmt.Printf("Type: %s, Status: %s, Reason: %s, Message: %s\n", condition.Type, condition.Status, condition.Reason, condition.Message)
}

return false, nil
})
}
5 changes: 5 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package main
import (
"flag"
"os"
"time"

certmanager "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"

Expand Down Expand Up @@ -58,6 +59,7 @@ func main() {
var enableLeaderElection bool
var probeAddr string
var disableApprovedCheck bool
var maxRetryDuration time.Duration
var disableClientSideRateLimiting bool

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
Expand All @@ -67,6 +69,7 @@ func main() {
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&disableApprovedCheck, "disable-approved-check", false,
"Disables waiting for CertificateRequests to have an approved condition before signing.")
flag.DurationVar(&maxRetryDuration, "max-retry-duration", 180, "Maximum duration to retry failed CertificateRequests. Set to 0 to disable.")
flag.BoolVar(&disableClientSideRateLimiting, "disable-client-side-rate-limiting", false,
"Disables Kubernetes client-side rate limiting (only use if API Priority & Fairness is enabled on the cluster).")

Expand Down Expand Up @@ -135,7 +138,9 @@ func main() {
Recorder: mgr.GetEventRecorderFor("awspcaissuer-controller"),

Clock: clock.RealClock{},
RequeueItter: controllers.NewRequeueItter(),
CheckApprovedCondition: !disableApprovedCheck,
MaxRetryDuration: maxRetryDuration,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CertificateRequest")
os.Exit(1)
Expand Down
3 changes: 3 additions & 0 deletions pkg/api/v1beta1/awspcaissuer_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ type AWSPCAIssuerStatus struct {
// ConditionTypeReady is the default condition type for the CRs
const ConditionTypeReady = "Ready"

// ConditionTypeIssuing is the condition type for the CRs when they are issuing a certificate
const ConditionTypeIssuing = "Issuing"

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status

Expand Down
89 changes: 70 additions & 19 deletions pkg/controllers/certificaterequest_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"context"
"errors"
"fmt"
"math/rand"
"time"

acmpcatypes "github.com/aws/aws-sdk-go-v2/service/acmpca/types"
"github.com/cert-manager/aws-privateca-issuer/pkg/aws"
Expand All @@ -42,6 +44,21 @@ import (
cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1"
)

type RequeueItter interface {
RequeueAfter() time.Duration
}

type requeueItter struct {
}

func (r *requeueItter) RequeueAfter() time.Duration {
return 1*time.Minute + time.Duration(rand.Intn(60))*time.Second
}

func NewRequeueItter() RequeueItter {
return &requeueItter{}
}

// CertificateRequestReconciler reconciles a AWSPCAIssuer object
type CertificateRequestReconciler struct {
client.Client
Expand All @@ -50,7 +67,9 @@ type CertificateRequestReconciler struct {
Recorder record.EventRecorder

Clock clock.Clock
RequeueItter RequeueItter
CheckApprovedCondition bool
MaxRetryDuration time.Duration
}

// +kubebuilder:rbac:groups=cert-manager.io,resources=certificaterequests,verbs=get;list;watch;update
Expand Down Expand Up @@ -118,7 +137,7 @@ func (r *CertificateRequestReconciler) Reconcile(ctx context.Context, req ctrl.R
}

message := "The CertificateRequest was denied by an approval controller"
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonDenied, message)
return ctrl.Result{}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonDenied, message)
}

if r.CheckApprovedCondition {
Expand All @@ -142,53 +161,67 @@ func (r *CertificateRequestReconciler) Reconcile(ctx context.Context, req ctrl.R
issuerName.Namespace = ""
}

retry, requeue, err := r.HandleSignRequest(ctx, log, issuerName, cr)

if err != nil {
now := r.Clock.Now()
creationTime := cr.GetCreationTimestamp()
pastMaxRetryDuration := now.After(creationTime.Add(r.MaxRetryDuration))

if pastMaxRetryDuration || !retry {
return ctrl.Result{Requeue: requeue}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "Permanent error signing certificate: %s", err.Error())
}

return ctrl.Result{
RequeueAfter: r.RequeueItter.RequeueAfter(),
}, r.setTemporaryStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "Temporary error signing certificate, retry again: %s", err.Error())
}

return ctrl.Result{Requeue: requeue}, r.setPermanentStatus(ctx, cr, cmmeta.ConditionTrue, cmapi.CertificateRequestReasonIssued, "Certificate issued")
}

func (r *CertificateRequestReconciler) HandleSignRequest(ctx context.Context, log logr.Logger, issuerName types.NamespacedName, cr *cmapi.CertificateRequest) (retry bool, requeue bool, error) {
iss, err := util.GetIssuer(ctx, r.Client, issuerName)
if err != nil {
log.Error(err, "failed to retrieve Issuer resource")
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "issuer could not be found")
return ctrl.Result{}, err
return true, false, fmt.Errorf("failed to retrieve Issuer resource: %w", err)
}

if !isReady(iss) {
err := fmt.Errorf("issuer %s is not ready", iss.GetName())
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "issuer is not ready")
return ctrl.Result{}, err
return true, false, fmt.Errorf("issuer %s is not ready", iss.GetName())
}

provisioner, ok := aws.GetProvisioner(issuerName)
if !ok {
err := fmt.Errorf("provisioner for %s not found", issuerName)
log.Error(err, "failed to retrieve provisioner")
_ = r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to retrieve provisioner")
return ctrl.Result{}, err
return true, false, fmt.Errorf("provisioner for %s not found (name: %s, namespace: %s)", issuerName, issuerName.Name, issuerName.Namespace)
}

certArn, exists := cr.ObjectMeta.GetAnnotations()["aws-privateca-issuer/certificate-arn"]
if !exists {
err := provisioner.Sign(ctx, cr, log)
if err != nil {
log.Error(err, "failed to request certificate from PCA")
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to request certificate from PCA: "+err.Error())
return false, fmt.Errorf("failed to sign certificat from PCA: %w", err)
}

return ctrl.Result{Requeue: true}, r.Client.Update(ctx, cr)
return true, true, nil
}

pem, ca, err := provisioner.Get(ctx, cr, certArn, log)
if err != nil {
var errorType *acmpcatypes.RequestInProgressException
if errors.As(err, &errorType) {
log.Info("certificate is still issuing")
return ctrl.Result{Requeue: true}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonPending, "waiting for certificate to be issued")
return false, false,fmt.Errorf("waiting for certificate to be issued: %w", err)
}

log.Error(err, "failed to issue certificate from PCA")
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionFalse, cmapi.CertificateRequestReasonFailed, "failed to issue certificate from PCA: "+err.Error())
return false,true, fmt.Errorf("failed to issue certificate from PCA: %w", err)
}

cr.Status.Certificate = pem
cr.Status.CA = ca
return ctrl.Result{}, r.setStatus(ctx, cr, cmmeta.ConditionTrue, cmapi.CertificateRequestReasonIssued, "certificate issued")

return true, false, nil
}

// SetupWithManager sets up the controller with the Manager.
Expand All @@ -207,14 +240,32 @@ func isReady(issuer api.GenericIssuer) bool {
return false
}

func (r *CertificateRequestReconciler) setStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
func (r *CertificateRequestReconciler) setStatusInternal(ctx context.Context, cr *cmapi.CertificateRequest, permanent bool, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
completeMessage := fmt.Sprintf(message, args...)
cmutil.SetCertificateRequestCondition(cr, "Ready", status, reason, completeMessage)

eventType := core.EventTypeNormal
if status == cmmeta.ConditionFalse {
eventType = core.EventTypeWarning
}
r.Recorder.Event(cr, eventType, reason, completeMessage)
return r.Client.Status().Update(ctx, cr)

cmutil.SetCertificateRequestCondition(cr, api.ConditionTypeIssuing, status, reason, completeMessage)
if permanent {
cmutil.SetCertificateRequestCondition(cr, api.ConditionTypeReady, status, reason, completeMessage)
}
r.Client.Status().Update(ctx, cr)

if reason == cmapi.CertificateRequestReasonFailed {
return fmt.Errorf(completeMessage)
}

return nil
}

func (r *CertificateRequestReconciler) setPermanentStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
return r.setStatusInternal(ctx, cr, true, status, reason, message, args...)
}

func (r *CertificateRequestReconciler) setTemporaryStatus(ctx context.Context, cr *cmapi.CertificateRequest, status cmmeta.ConditionStatus, reason, message string, args ...interface{}) error {
return r.setStatusInternal(ctx, cr, false, status, reason, message, args...)
}
Loading