Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

connect: persist intermediate CAs on leader change #4379

Merged
merged 4 commits into from
Jul 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions agent/consul/fsm/snapshot_oss.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func init() {
registerRestorer(structs.AutopilotRequestType, restoreAutopilot)
registerRestorer(structs.IntentionRequestType, restoreIntention)
registerRestorer(structs.ConnectCARequestType, restoreConnectCA)
registerRestorer(structs.ConnectCAProviderStateType, restoreConnectCAProviderState)
}

func persistOSS(s *snapshot, sink raft.SnapshotSink, encoder *codec.Encoder) error {
Expand Down Expand Up @@ -52,6 +53,9 @@ func persistOSS(s *snapshot, sink raft.SnapshotSink, encoder *codec.Encoder) err
if err := s.persistConnectCA(sink, encoder); err != nil {
return err
}
if err := s.persistConnectCAProviderState(sink, encoder); err != nil {
return err
}
return nil
}

Expand Down Expand Up @@ -284,6 +288,24 @@ func (s *snapshot) persistConnectCA(sink raft.SnapshotSink,
return nil
}

func (s *snapshot) persistConnectCAProviderState(sink raft.SnapshotSink,
encoder *codec.Encoder) error {
state, err := s.state.CAProviderState()
if err != nil {
return err
}

for _, r := range state {
if _, err := sink.Write([]byte{byte(structs.ConnectCAProviderStateType)}); err != nil {
return err
}
if err := encoder.Encode(r); err != nil {
return err
}
}
return nil
}

func (s *snapshot) persistIntentions(sink raft.SnapshotSink,
encoder *codec.Encoder) error {
ixns, err := s.state.Intentions()
Expand Down Expand Up @@ -430,3 +452,14 @@ func restoreConnectCA(header *snapshotHeader, restore *state.Restore, decoder *c
}
return nil
}

func restoreConnectCAProviderState(header *snapshotHeader, restore *state.Restore, decoder *codec.Decoder) error {
var req structs.CAConsulProviderState
if err := decoder.Decode(&req); err != nil {
return err
}
if err := restore.CAProviderState(&req); err != nil {
return err
}
return nil
}
14 changes: 14 additions & 0 deletions agent/consul/fsm/snapshot_oss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,14 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
assert.Nil(err)
assert.True(ok)

ok, err = fsm.state.CASetProviderState(16, &structs.CAConsulProviderState{
ID: "asdf",
PrivateKey: "foo",
RootCert: "bar",
})
assert.Nil(err)
assert.True(ok)

// Snapshot
snap, err := fsm.Snapshot()
if err != nil {
Expand Down Expand Up @@ -296,6 +304,12 @@ func TestFSM_SnapshotRestore_OSS(t *testing.T) {
assert.Nil(err)
assert.Len(roots, 2)

// Verify provider state is restored.
_, state, err := fsm2.state.CAProviderState("asdf")
assert.Nil(err)
assert.Equal("foo", state.PrivateKey)
assert.Equal("bar", state.RootCert)

// Snapshot
snap, err = fsm2.Snapshot()
if err != nil {
Expand Down
25 changes: 12 additions & 13 deletions agent/consul/leader.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,15 +448,9 @@ func (s *Server) initializeCA() error {
return err
}

// TODO(banks): in the case that we've just gained leadership in an already
// configured cluster. We really need to fetch RootCA from state to provide it
// in setCAProvider. This matters because if the current active root has
// intermediates, parsing the rootCA from only the root cert PEM above will
// not include them and so leafs we sign will not bundle the intermediates.

s.setCAProvider(provider, rootCA)

// Check if the CA root is already initialized and exit if it is.
// Check if the CA root is already initialized and exit if it is,
// adding on any existing intermediate certs since they aren't directly
// tied to the provider.
// Every change to the CA after this initial bootstrapping should
// be done through the rotation process.
state := s.fsm.State()
Expand All @@ -465,12 +459,15 @@ func (s *Server) initializeCA() error {
return err
}
if activeRoot != nil {
// This state shouldn't be possible to get into because we update the root and
// CA config in the same FSM operation.
if activeRoot.ID != rootCA.ID {
// TODO(banks): this seems like a pretty catastrophic state to get into.
// Shouldn't we do something stronger than warn and continue signing with
// a key that's not the active CA according to the state?
s.logger.Printf("[WARN] connect: CA root %q is not the active root (%q)", rootCA.ID, activeRoot.ID)
return fmt.Errorf("stored CA root %q is not the active root (%s)", rootCA.ID, activeRoot.ID)
}

rootCA.IntermediateCerts = activeRoot.IntermediateCerts
s.setCAProvider(provider, rootCA)

return nil
}

Expand All @@ -494,6 +491,8 @@ func (s *Server) initializeCA() error {
return respErr
}

s.setCAProvider(provider, rootCA)

s.logger.Printf("[INFO] connect: initialized CA with provider %q", conf.Provider)

return nil
Expand Down
81 changes: 81 additions & 0 deletions agent/consul/leader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package consul

import (
"os"
"reflect"
"testing"
"time"

Expand Down Expand Up @@ -1064,3 +1065,83 @@ func TestLeader_CARootPruning(t *testing.T) {
require.True(roots[0].Active)
require.NotEqual(roots[0].ID, oldRoot.ID)
}

func TestLeader_PersistIntermediateCAs(t *testing.T) {
t.Parallel()

require := require.New(t)
dir1, s1 := testServer(t)
defer os.RemoveAll(dir1)
defer s1.Shutdown()
codec := rpcClient(t, s1)
defer codec.Close()

dir2, s2 := testServerDCBootstrap(t, "dc1", false)
defer os.RemoveAll(dir2)
defer s2.Shutdown()

dir3, s3 := testServerDCBootstrap(t, "dc1", false)
defer os.RemoveAll(dir3)
defer s3.Shutdown()

joinLAN(t, s2, s1)
joinLAN(t, s3, s1)

testrpc.WaitForLeader(t, s1.RPC, "dc1")

// Get the current root
rootReq := &structs.DCSpecificRequest{
Datacenter: "dc1",
}
var rootList structs.IndexedCARoots
require.Nil(msgpackrpc.CallWithCodec(codec, "ConnectCA.Roots", rootReq, &rootList))
require.Len(rootList.Roots, 1)

// Update the provider config to use a new private key, which should
// cause a rotation.
_, newKey, err := connect.GeneratePrivateKey()
require.NoError(err)
newConfig := &structs.CAConfiguration{
Provider: "consul",
Config: map[string]interface{}{
"PrivateKey": newKey,
"RootCert": "",
"RotationPeriod": 90 * 24 * time.Hour,
},
}
{
args := &structs.CARequest{
Datacenter: "dc1",
Config: newConfig,
}
var reply interface{}

require.NoError(msgpackrpc.CallWithCodec(codec, "ConnectCA.ConfigurationSet", args, &reply))
}

// Get the active root before leader change.
_, root := s1.getCAProvider()
require.Len(root.IntermediateCerts, 1)

// Force a leader change and make sure the root CA values are preserved.
s1.Leave()
s1.Shutdown()

retry.Run(t, func(r *retry.R) {
var leader *Server
for _, s := range []*Server{s2, s3} {
if s.IsLeader() {
leader = s
break
}
}
if leader == nil {
r.Fatal("no leader")
}

_, newLeaderRoot := leader.getCAProvider()
if !reflect.DeepEqual(newLeaderRoot, root) {
r.Fatalf("got %v, want %v", newLeaderRoot, root)
}
})
}
29 changes: 15 additions & 14 deletions agent/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,21 @@ type RaftIndex struct {
// These are serialized between Consul servers and stored in Consul snapshots,
// so entries must only ever be added.
const (
RegisterRequestType MessageType = 0
DeregisterRequestType = 1
KVSRequestType = 2
SessionRequestType = 3
ACLRequestType = 4
TombstoneRequestType = 5
CoordinateBatchUpdateType = 6
PreparedQueryRequestType = 7
TxnRequestType = 8
AutopilotRequestType = 9
AreaRequestType = 10
ACLBootstrapRequestType = 11 // FSM snapshots only.
IntentionRequestType = 12
ConnectCARequestType = 13
RegisterRequestType MessageType = 0
DeregisterRequestType = 1
KVSRequestType = 2
SessionRequestType = 3
ACLRequestType = 4
TombstoneRequestType = 5
CoordinateBatchUpdateType = 6
PreparedQueryRequestType = 7
TxnRequestType = 8
AutopilotRequestType = 9
AreaRequestType = 10
ACLBootstrapRequestType = 11 // FSM snapshots only.
IntentionRequestType = 12
ConnectCARequestType = 13
ConnectCAProviderStateType = 14
)

const (
Expand Down