Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(kuma-cp): reissue admin tls cert on dp address change #5222

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions pkg/xds/sync/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,16 @@ func DefaultDataplaneWatchdogFactory(
)

deps := DataplaneWatchdogDependencies{
dataplaneProxyBuilder: dataplaneProxyBuilder,
dataplaneReconciler: dataplaneReconciler,
ingressProxyBuilder: ingressProxyBuilder,
ingressReconciler: ingressReconciler,
egressProxyBuilder: egressProxyBuilder,
egressReconciler: egressReconciler,
envoyCpCtx: envoyCpCtx,
meshCache: rt.MeshCache(),
metadataTracker: metadataTracker,
resManager: rt.ReadOnlyResourceManager(),
DataplaneProxyBuilder: dataplaneProxyBuilder,
DataplaneReconciler: dataplaneReconciler,
IngressProxyBuilder: ingressProxyBuilder,
IngressReconciler: ingressReconciler,
EgressProxyBuilder: egressProxyBuilder,
EgressReconciler: egressReconciler,
EnvoyCpCtx: envoyCpCtx,
MeshCache: rt.MeshCache(),
MetadataTracker: metadataTracker,
ResManager: rt.ReadOnlyResourceManager(),
}
return NewDataplaneWatchdogFactory(
xdsMetrics,
Expand Down
60 changes: 31 additions & 29 deletions pkg/xds/sync/dataplane_watchdog.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ import (
)

type DataplaneWatchdogDependencies struct {
dataplaneProxyBuilder *DataplaneProxyBuilder
dataplaneReconciler SnapshotReconciler
ingressProxyBuilder *IngressProxyBuilder
ingressReconciler SnapshotReconciler
egressProxyBuilder *EgressProxyBuilder
egressReconciler SnapshotReconciler
envoyCpCtx *xds_context.ControlPlaneContext
meshCache *mesh.Cache
metadataTracker DataplaneMetadataTracker
resManager core_manager.ReadOnlyResourceManager
DataplaneProxyBuilder *DataplaneProxyBuilder
DataplaneReconciler SnapshotReconciler
IngressProxyBuilder *IngressProxyBuilder
IngressReconciler SnapshotReconciler
EgressProxyBuilder *EgressProxyBuilder
EgressReconciler SnapshotReconciler
EnvoyCpCtx *xds_context.ControlPlaneContext
MeshCache *mesh.Cache
MetadataTracker DataplaneMetadataTracker
ResManager core_manager.ReadOnlyResourceManager
}

type DataplaneWatchdog struct {
Expand All @@ -40,6 +40,7 @@ type DataplaneWatchdog struct {
dpType mesh_proto.ProxyType
proxyTypeSettled bool
envoyAdminMTLS *core_xds.ServerSideMTLSCerts
dpAddress string
}

func NewDataplaneWatchdog(deps DataplaneWatchdogDependencies, dpKey core_model.ResourceKey) *DataplaneWatchdog {
Expand All @@ -52,7 +53,7 @@ func NewDataplaneWatchdog(deps DataplaneWatchdogDependencies, dpKey core_model.R
}

func (d *DataplaneWatchdog) Sync(ctx context.Context) error {
metadata := d.metadataTracker.Metadata(d.key)
metadata := d.MetadataTracker.Metadata(d.key)
if metadata == nil {
return errors.New("metadata cannot be nil")
}
Expand All @@ -77,12 +78,12 @@ func (d *DataplaneWatchdog) Cleanup() error {
proxyID := core_xds.FromResourceKey(d.key)
switch d.dpType {
case mesh_proto.DataplaneProxyType:
d.envoyCpCtx.Secrets.Cleanup(d.key)
return d.dataplaneReconciler.Clear(&proxyID)
d.EnvoyCpCtx.Secrets.Cleanup(d.key)
return d.DataplaneReconciler.Clear(&proxyID)
case mesh_proto.IngressProxyType:
return d.ingressReconciler.Clear(&proxyID)
return d.IngressReconciler.Clear(&proxyID)
case mesh_proto.EgressProxyType:
return d.egressReconciler.Clear(&proxyID)
return d.EgressReconciler.Clear(&proxyID)
default:
return nil
}
Expand All @@ -91,12 +92,12 @@ func (d *DataplaneWatchdog) Cleanup() error {
// syncDataplane syncs state of the Dataplane.
// It uses Mesh Hash to decide if we need to regenerate configuration or not.
func (d *DataplaneWatchdog) syncDataplane(ctx context.Context) error {
meshCtx, err := d.meshCache.GetMeshContext(ctx, syncLog, d.key.Mesh)
meshCtx, err := d.MeshCache.GetMeshContext(ctx, syncLog, d.key.Mesh)
if err != nil {
return err
}

certInfo := d.envoyCpCtx.Secrets.Info(d.key)
certInfo := d.EnvoyCpCtx.Secrets.Info(d.key)
syncForCert := certInfo != nil && certInfo.ExpiringSoon() // check if we need to regenerate config because identity cert is expiring soon.
syncForConfig := meshCtx.Hash != d.lastHash // check if we need to regenerate config because Kuma policies has changed.
if !syncForCert && !syncForConfig {
Expand All @@ -110,10 +111,10 @@ func (d *DataplaneWatchdog) syncDataplane(ctx context.Context) error {
}

envoyCtx := &xds_context.Context{
ControlPlane: d.envoyCpCtx,
ControlPlane: d.EnvoyCpCtx,
Mesh: meshCtx,
}
proxy, err := d.dataplaneProxyBuilder.Build(ctx, d.key, meshCtx)
proxy, err := d.DataplaneProxyBuilder.Build(ctx, d.key, meshCtx)
if err != nil {
return err
}
Expand All @@ -123,9 +124,9 @@ func (d *DataplaneWatchdog) syncDataplane(ctx context.Context) error {
}
proxy.EnvoyAdminMTLSCerts = envoyAdminMTLS
if !envoyCtx.Mesh.Resource.MTLSEnabled() {
d.envoyCpCtx.Secrets.Cleanup(d.key) // we need to cleanup secrets if mtls is disabled
d.EnvoyCpCtx.Secrets.Cleanup(d.key) // we need to cleanup secrets if mtls is disabled
}
if err := d.dataplaneReconciler.Reconcile(*envoyCtx, proxy); err != nil {
if err := d.DataplaneReconciler.Reconcile(*envoyCtx, proxy); err != nil {
return err
}
d.lastHash = meshCtx.Hash
Expand All @@ -135,10 +136,10 @@ func (d *DataplaneWatchdog) syncDataplane(ctx context.Context) error {
// syncIngress synces state of Ingress Dataplane. Notice that it does not use Mesh Hash yet because Ingress supports many Meshes.
func (d *DataplaneWatchdog) syncIngress(ctx context.Context) error {
envoyCtx := &xds_context.Context{
ControlPlane: d.envoyCpCtx,
ControlPlane: d.EnvoyCpCtx,
Mesh: xds_context.MeshContext{}, // ZoneIngress does not have a mesh!
}
proxy, err := d.ingressProxyBuilder.Build(ctx, d.key)
proxy, err := d.IngressProxyBuilder.Build(ctx, d.key)
if err != nil {
return err
}
Expand All @@ -147,18 +148,18 @@ func (d *DataplaneWatchdog) syncIngress(ctx context.Context) error {
return errors.Wrap(err, "could not get Envoy Admin mTLS certs")
}
proxy.EnvoyAdminMTLSCerts = envoyAdminMTLS
return d.ingressReconciler.Reconcile(*envoyCtx, proxy)
return d.IngressReconciler.Reconcile(*envoyCtx, proxy)
}

// syncEgress syncs state of Egress Dataplane. Notice that it does not use
// Mesh Hash yet because Egress supports many Meshes.
func (d *DataplaneWatchdog) syncEgress(ctx context.Context) error {
envoyCtx := &xds_context.Context{
ControlPlane: d.envoyCpCtx,
ControlPlane: d.EnvoyCpCtx,
Mesh: xds_context.MeshContext{}, // ZoneEgress does not have a mesh!
}

proxy, err := d.egressProxyBuilder.Build(ctx, d.key)
proxy, err := d.EgressProxyBuilder.Build(ctx, d.key)
if err != nil {
return err
}
Expand All @@ -168,12 +169,12 @@ func (d *DataplaneWatchdog) syncEgress(ctx context.Context) error {
}
proxy.EnvoyAdminMTLSCerts = envoyAdminMTLS

return d.egressReconciler.Reconcile(*envoyCtx, proxy)
return d.EgressReconciler.Reconcile(*envoyCtx, proxy)
}

func (d *DataplaneWatchdog) getEnvoyAdminMTLS(ctx context.Context, address string) (core_xds.ServerSideMTLSCerts, error) {
if d.envoyAdminMTLS == nil {
ca, err := envoy_admin_tls.LoadCA(ctx, d.resManager)
if d.envoyAdminMTLS == nil || d.dpAddress != address {
ca, err := envoy_admin_tls.LoadCA(ctx, d.ResManager)
if err != nil {
return core_xds.ServerSideMTLSCerts{}, errors.Wrap(err, "could not load the CA")
}
Expand All @@ -195,6 +196,7 @@ func (d *DataplaneWatchdog) getEnvoyAdminMTLS(ctx context.Context, address strin
// 2) have a stable certs = stable Envoy config
// This means that if we want to change Envoy Admin CA, we need to restart all CP instances.
jakubdyszkiewicz marked this conversation as resolved.
Show resolved Hide resolved
d.envoyAdminMTLS = &envoyAdminMTLS
d.dpAddress = address
}
return *d.envoyAdminMTLS, nil
}
175 changes: 175 additions & 0 deletions pkg/xds/sync/dataplane_watchdog_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package sync_test

import (
"context"
"net"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"k8s.io/client-go/util/cert"

mesh_proto "github.com/kumahq/kuma/api/mesh/v1alpha1"
config_manager "github.com/kumahq/kuma/pkg/core/config/manager"
core_mesh "github.com/kumahq/kuma/pkg/core/resources/apis/mesh"
"github.com/kumahq/kuma/pkg/core/resources/manager"
core_model "github.com/kumahq/kuma/pkg/core/resources/model"
core_xds "github.com/kumahq/kuma/pkg/core/xds"
"github.com/kumahq/kuma/pkg/dns/vips"
envoy_admin_tls "github.com/kumahq/kuma/pkg/envoy/admin/tls"
"github.com/kumahq/kuma/pkg/metrics"
"github.com/kumahq/kuma/pkg/plugins/resources/memory"
"github.com/kumahq/kuma/pkg/test/resources/samples"
"github.com/kumahq/kuma/pkg/xds/cache/mesh"
xds_context "github.com/kumahq/kuma/pkg/xds/context"
"github.com/kumahq/kuma/pkg/xds/envoy"
"github.com/kumahq/kuma/pkg/xds/secrets"
"github.com/kumahq/kuma/pkg/xds/server"
"github.com/kumahq/kuma/pkg/xds/sync"
)

type staticMetadataTracker struct {
metadata *core_xds.DataplaneMetadata
}

var _ sync.DataplaneMetadataTracker = &staticMetadataTracker{}

func (s *staticMetadataTracker) Metadata(dpKey core_model.ResourceKey) *core_xds.DataplaneMetadata {
return s.metadata
}

type staticSnapshotReconciler struct {
proxy *core_xds.Proxy
}

func (s *staticSnapshotReconciler) Reconcile(ctx xds_context.Context, proxy *core_xds.Proxy) error {
s.proxy = proxy
return nil
}

func (s *staticSnapshotReconciler) Clear(proxyId *core_xds.ProxyId) error {
return nil
}

var _ sync.SnapshotReconciler = &staticSnapshotReconciler{}

var _ = Describe("Dataplane Watchdog", func() {
const zone = ""
const cacheExpirationTime = time.Millisecond

var resManager manager.ResourceManager
var snapshotReconciler *staticSnapshotReconciler
var metadataTracker *staticMetadataTracker
var deps sync.DataplaneWatchdogDependencies

BeforeEach(func() {
snapshotReconciler = &staticSnapshotReconciler{}
metadataTracker = &staticMetadataTracker{}

store := memory.NewStore()
resManager = manager.NewResourceManager(store)
meshContextBuilder := xds_context.NewMeshContextBuilder(
resManager,
server.MeshResourceTypes(server.HashMeshExcludedResources),
net.LookupIP,
zone,
vips.NewPersistence(resManager, config_manager.NewConfigManager(store)),
".mesh",
80,
)
newMetrics, err := metrics.NewMetrics(zone)
Expect(err).ToNot(HaveOccurred())
cache, err := mesh.NewCache(cacheExpirationTime, meshContextBuilder, newMetrics)
Expect(err).ToNot(HaveOccurred())

secrets, err := secrets.NewSecrets(nil, nil, newMetrics) // nil is ok for now, because we don't use it
Expect(err).ToNot(HaveOccurred())

deps = sync.DataplaneWatchdogDependencies{
DataplaneProxyBuilder: &sync.DataplaneProxyBuilder{
MetadataTracker: metadataTracker,
APIVersion: envoy.APIV3,
Zone: zone,
},
DataplaneReconciler: snapshotReconciler,
EnvoyCpCtx: &xds_context.ControlPlaneContext{
Secrets: secrets,
Zone: zone,
},
MeshCache: cache,
MetadataTracker: metadataTracker,
ResManager: resManager,
}

pair, err := envoy_admin_tls.GenerateCA()
Expect(err).ToNot(HaveOccurred())
Expect(envoy_admin_tls.CreateCA(context.Background(), *pair, resManager)).To(Succeed())
})

Context("Dataplane", func() {
var resKey core_model.ResourceKey
var watchdog *sync.DataplaneWatchdog
var ctx context.Context

BeforeEach(func() {
Expect(samples.MeshDefaultBuilder().Create(resManager))
Expect(samples.DataplaneBackendBuilder().Create(resManager)).To(Succeed())
resKey = core_model.MetaToResourceKey(samples.DataplaneBackend().GetMeta())

metadataTracker.metadata = &core_xds.DataplaneMetadata{
ProxyType: mesh_proto.DataplaneProxyType,
}
watchdog = sync.NewDataplaneWatchdog(deps, resKey)
ctx = context.Background()
})

It("should reissue admin tls certificate when address has changed", func() {
// when
err := watchdog.Sync(ctx)

// then
Expect(err).ToNot(HaveOccurred())

certs, err := cert.ParseCertsPEM(snapshotReconciler.proxy.EnvoyAdminMTLSCerts.ServerPair.CertPEM)
Expect(err).ToNot(HaveOccurred())
Expect(certs[0].Subject.CommonName).To(Equal(samples.DataplaneBackend().Spec.Networking.Address))

// when address has changed
newAddress := "192.168.1.100"
err = manager.Upsert(ctx, resManager, resKey, core_mesh.NewDataplaneResource(), func(resource core_model.Resource) error {
resource.(*core_mesh.DataplaneResource).Spec.Networking.Address = newAddress
return nil
})
Expect(err).ToNot(HaveOccurred())

// and
time.Sleep(cacheExpirationTime)
err = watchdog.Sync(ctx)

// then cert is reissued with a new address
Expect(err).ToNot(HaveOccurred())

certs, err = cert.ParseCertsPEM(snapshotReconciler.proxy.EnvoyAdminMTLSCerts.ServerPair.CertPEM)
Expect(err).ToNot(HaveOccurred())
Expect(certs[0].Subject.CommonName).To(Equal(newAddress))
})

It("should not reconcile if mesh hash is the same", func() {
// when
err := watchdog.Sync(ctx)

// then
Expect(err).ToNot(HaveOccurred())
Expect(snapshotReconciler.proxy).ToNot(BeNil())

// when
snapshotReconciler.proxy = nil // set to nil so we can check if it was not called again
time.Sleep(cacheExpirationTime)
err = watchdog.Sync(ctx)

// then
Expect(err).ToNot(HaveOccurred())
Expect(snapshotReconciler.proxy).To(BeNil())
})
})
})