Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bugs related to secret management and improve documentation #1358

Merged
merged 11 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ GOBIN=$(shell go env GOBIN)
endif

# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
CRD_OPTIONS ?= "crd"
CRD_OPTIONS ?= "crd:crdVersions=v1beta1"

BUILD_ID ?= $(shell git rev-parse --short HEAD)
timestamp := $(shell /bin/date "+%Y%m%d-%H%M%S")
Expand Down Expand Up @@ -55,9 +55,17 @@ generate-test-certs:
.PHONY: test-integration-controllers
test-integration-controllers: generate fmt vet manifests
TEST_RESOURCE_PREFIX=$(TEST_RESOURCE_PREFIX) TEST_USE_EXISTING_CLUSTER=false REQUEUE_AFTER=20 \
go test -v -tags "$(BUILD_TAGS)" -coverprofile=reports/integration-controllers-coverage-output.txt -coverpkg=./... -covermode count -parallel 4 -timeout 45m ./controllers/...
#2>&1 | tee reports/integration-controllers-output.txt
#go-junit-report < reports/integration-controllers-output.txt > reports/integration-controllers-report.xml
go test -v -tags "$(BUILD_TAGS)" -coverprofile=reports/integration-controllers-coverage-output.txt -coverpkg=./... -covermode count -parallel 4 -timeout 45m \
./controllers/... \
./pkg/secrets/...
# TODO: Note that the above test (secrets/keyvault) is not an integration-controller test... but it's not a unit test either and unfortunately the test-integration-managers target isn't run in CI either?

# Run subset of tests with v1 secret naming enabled to ensure no regression in old secret naming
.PHONY: test-v1-secret-naming
test-v1-secret-naming: generate fmt vet manifests
TEST_RESOURCE_PREFIX=$(TEST_RESOURCE_PREFIX) TEST_USE_EXISTING_CLUSTER=false REQUEUE_AFTER=20 AZURE_SECRET_NAMING_VERSION=1 \
go test -v -run "^.*_SecretNamedCorrectly$$" -tags "$(BUILD_TAGS)" -coverprofile=reports/v1-secret-naming-coverage-output.txt -coverpkg=./... -covermode count -parallel 4 -timeout 10m \
./controllers/...

# Run Resource Manager tests against the configured cluster
.PHONY: test-integration-managers
Expand All @@ -73,10 +81,8 @@ test-integration-managers: generate fmt vet manifests
./pkg/resourcemanager/psql/firewallrule/... \
./pkg/resourcemanager/appinsights/... \
./pkg/resourcemanager/vnet/...
#2>&1 | tee reports/integration-managers-output.txt
#go-junit-report < reports/integration-managers-output.txt > reports/integration-managers-report.xml

# Run all available tests. Note that Controllers are not unit-testable.
# Run all available unit tests.
.PHONY: test-unit
test-unit: generate fmt vet manifests
TEST_USE_EXISTING_CLUSTER=false REQUEUE_AFTER=20 \
Expand All @@ -85,8 +91,6 @@ test-unit: generate fmt vet manifests
./pkg/resourcemanager/azuresql/azuresqlfailovergroup
# The below folders are commented out because the tests in them fail...
# ./api/... \
# ./pkg/secrets/... \
#2>&1 | tee testlogs.txt

# Merge all the available test coverage results and publish a single report
.PHONY: test-process-coverage
Expand Down Expand Up @@ -223,12 +227,9 @@ generate: manifests
# download controller-gen if necessary
.PHONY: controller-gen
controller-gen:
ifeq (, $(shell which controller-gen))
go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.2.5
CONTROLLER_GEN=$(shell go env GOPATH)/bin/controller-gen
else
CONTROLLER_GEN=$(shell which controller-gen)
endif
go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.4.0
CONTROLLER_GEN=$(shell go env GOPATH)/bin/controller-gen


.PHONY: install-bindata
install-bindata:
Expand Down Expand Up @@ -316,13 +317,12 @@ install-aad-pod-identity:
kubectl apply -f https://raw.githubusercontent.com/Azure/aad-pod-identity/master/deploy/infra/deployment-rbac.yaml

.PHONY: install-test-dependencies
install-test-dependencies:
install-test-dependencies: controller-gen
go get github.com/jstemmer/go-junit-report \
&& go get github.com/axw/gocov/gocov \
&& go get github.com/AlekSi/gocov-xml \
&& go get github.com/wadey/gocovmerge \
&& go get k8s.io/code-generator/cmd/conversion-gen@v0.18.2 \
&& go get sigs.k8s.io/controller-tools/cmd/controller-gen@v0.4.0 \
&& go get sigs.k8s.io/kind@v0.9.0 \
&& go get sigs.k8s.io/kustomize/kustomize/v3@v3.8.6

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ Ready to quickly deploy the latest version of Azure Service Operator on your Kub
--set azureClientSecret=$AZURE_CLIENT_SECRET
```

If you would like to install an older version you can list the avialable versions:
If you would like to install an older version you can list the available versions:
```sh
helm search repo aso --versions
```
Expand Down
2 changes: 1 addition & 1 deletion api/v1alpha1/azuresqluser_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ type AzureSQLUserSpec struct {
AdminSecretKeyVault string `json:"adminSecretKeyVault,omitempty"`
Username string `json:"username,omitempty"`
KeyVaultToStoreSecrets string `json:"keyVaultToStoreSecrets,omitempty"`
KeyVaultSecretPrefix string `json:"keyVaultSecretPrefix,omitempty"`
KeyVaultSecretPrefix string `json:"keyVaultSecretPrefix,omitempty"` // TODO: Remove this in a future version?
KeyVaultSecretFormats []string `json:"keyVaultSecretFormats,omitempty"`
}

Expand Down
3 changes: 2 additions & 1 deletion api/v1alpha1/storageaccount_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ type StorageAccountSpec struct {
// StorageAccountSku the SKU of the storage account.
type StorageAccountSku struct {
// Name - The SKU name. Required for account creation; optional for update.
// Possible values include: 'StandardLRS', 'StandardGRS', 'StandardRAGRS', 'StandardZRS', 'PremiumLRS', 'PremiumZRS', 'StandardGZRS', 'StandardRAGZRS'
// Possible values include: 'Standard_LRS', 'Standard_GRS', 'Standard_RAGRS', 'Standard_ZRS', 'Premium_LRS', 'Premium_ZRS', 'Standard_GZRS', 'Standard_RAGZRS'.
// For the full list of allowed options, see: https://docs.microsoft.com/en-us/rest/api/storagerp/storageaccounts/create#skuname
Name StorageAccountSkuName `json:"name,omitempty"`
}

Expand Down
20 changes: 20 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,26 @@ steps:
BUILD_ID: $(Build.BuildId)
workingDirectory: '$(System.DefaultWorkingDirectory)'

# TODO: There is no way to run steps in parallel in Azure pipelines but ideally this step would run in parallel
# TODO: with the above testing step to reduce overall runtime
- script: |
set -e
export PATH=$PATH:$(go env GOPATH)/bin:$(go env GOPATH)/kubebuilder/bin
export KUBEBUILDER_ASSETS=$(go env GOPATH)/kubebuilder/bin
make test-v1-secret-naming
displayName: Run legacy v1 secret naming tests
condition: eq(variables['check_changes.SOURCE_CODE_CHANGED'], 'true')
continueOnError: 'false'
env:
GO111MODULE: on
AZURE_SUBSCRIPTION_ID: $(AZURE_SUBSCRIPTION_ID)
AZURE_TENANT_ID: $(AZURE_TENANT_ID)
AZURE_CLIENT_ID: $(AZURE_CLIENT_ID)
AZURE_CLIENT_SECRET: $(AZURE_CLIENT_SECRET)
REQUEUE_AFTER: $(REQUEUE_AFTER)
BUILD_ID: $(Build.BuildId)
workingDirectory: '$(System.DefaultWorkingDirectory)'

- script: |
set -e
export PATH=$PATH:$(go env GOPATH)/bin
Expand Down
5 changes: 4 additions & 1 deletion charts/azure-service-operator/templates/secret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@ data:
{{- end }}
{{- if .Values.azureOperatorKeyvault }}
AZURE_OPERATOR_KEYVAULT: {{ .Values.azureOperatorKeyvault | b64enc | quote }}
{{- end }}
{{- end }}
{{- if .Values.azureSecretNamingVersion }}
AZURE_SECRET_NAMING_VERSION: {{ .Values.azureSecretNamingVersion | b64enc | quote }}
{{- end }}
3 changes: 3 additions & 0 deletions charts/azure-service-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ azureClientSecret: ""
# azureUseMI determines if ASO will use a Managed Identity to authenticate.
azureUseMI: False

# azureSecretNamingVersion allows choosing the algorithm used to derive secret names. Version 2 is recommended.
azureSecretNamingVersion: "2"

# image defines the container image the ASO pod should run
# Note: This should use the latest released tag number explicitly. If
# it's ':latest' and someone deploys the chart after a new version has
Expand Down
6 changes: 6 additions & 0 deletions config/default/manager_image_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ spec:
key: AZURE_CLOUD_ENV
name: azureoperatorsettings
optional: true
- name: AZURE_SECRET_NAMING_VERSION
valueFrom:
secretKeyRef:
name: azureoperatorsettings
key: AZURE_SECRET_NAMING_VERSION
optional: true
matthchr marked this conversation as resolved.
Show resolved Hide resolved
# Used along with aad-pod-identity integration, but set always
# because it doesn't hurt
- name: POD_NAMESPACE
Expand Down
9 changes: 5 additions & 4 deletions controllers/appinsights_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ import (
"context"
"testing"

azurev1alpha1 "github.com/Azure/azure-service-operator/api/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

azurev1alpha1 "github.com/Azure/azure-service-operator/api/v1alpha1"
)

func TestAppInsightsController(t *testing.T) {
Expand All @@ -23,7 +24,7 @@ func TestAppInsightsController(t *testing.T) {
appInsightsName := GenerateTestResourceName("appinsights")

// Create an instance of Azure AppInsights
appInsightsInstance := &azurev1alpha1.AppInsights{
instance := &azurev1alpha1.AppInsights{
ObjectMeta: metav1.ObjectMeta{
Name: appInsightsName,
Namespace: "default",
Expand All @@ -36,7 +37,7 @@ func TestAppInsightsController(t *testing.T) {
},
}

EnsureInstance(ctx, t, tc, appInsightsInstance)
EnsureInstance(ctx, t, tc, instance)

EnsureDelete(ctx, t, tc, appInsightsInstance)
EnsureDelete(ctx, t, tc, instance)
}
23 changes: 12 additions & 11 deletions controllers/async_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import (
)

const (
finalizerName string = "azure.microsoft.com/finalizer"
requeDuration time.Duration = time.Second * 20
successMsg string = "successfully provisioned"
finalizerName string = "azure.microsoft.com/finalizer"
requeueDuration time.Duration = time.Second * 20
matthchr marked this conversation as resolved.
Show resolved Hide resolved
successMsg string = "successfully provisioned"
)

// AsyncReconciler is a generic reconciler for Azure resources.
Expand Down Expand Up @@ -72,20 +72,21 @@ func (r *AsyncReconciler) Reconcile(req ctrl.Request, obj runtime.Object) (resul
var keyvaultSecretClient secrets.SecretClient

// Determine if we need to check KeyVault for secrets
KeyVaultName := keyvaultsecretlib.GetKeyVaultName(obj)
keyVaultName := keyvaultsecretlib.GetKeyVaultName(obj)

if len(KeyVaultName) != 0 {
if len(keyVaultName) != 0 {
// Instantiate the KeyVault Secret Client
keyvaultSecretClient = keyvaultsecretlib.New(KeyVaultName, config.GlobalCredentials())
keyvaultSecretClient = keyvaultsecretlib.New(keyVaultName, config.GlobalCredentials(), config.SecretNamingVersion())

r.Telemetry.LogInfoByInstance("status", "ensuring vault", req.String())

// TODO: It's really awkward that we do this so often?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow yeah, that method seems really heavyweight! Is this to handle the case where the vault isn't accessible yet and the object is being created for the first time, the individual resource managers don't have to do the work of tearing down the resource they created if the secret can't be stored?

Although I don't think that really holds does it? There's still a possibility of the secret write failing even if we've looked-before-leaping. I checked a number of resources' Ensure methods and they almost all write the secret first (having generated keys/passwords/etc) to avoid that scenario, or in the case of redis the secret content is queryable (not very secret) after creation so it just requeues if it has a problem writing it. So possibly we don't need to do the check here?

This could be one of those "you're only allowed to remove this if you can tell me why it's there" situations though - maybe there's something I'm missing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think this is more about error code handling and setting the right message on the resource if we can't access the KeyVault, although doing a pre-check so that resources don't need to might be part of it as well.

Obviously as you point out, no amount of looking can guarantee your leap succeeds so pre-checking feels sorta pointless, and an alternative where we have an error handling library that can help us determine error causes presumably would mean that rather than doing this check once we can just farm it out to the error checker in all the places we interact with KV at which point there's not much value in doing this it seems to me as well? Especially since we need error handling in those other places already so it's not like we're forcing some there when previously we had none...

I've filed #1369 to track this as I don't think we want to bloat this PR further by me doing it here.

if !keyvaultsecretlib.IsKeyVaultAccessible(keyvaultSecretClient) {
r.Telemetry.LogInfoByInstance("requeuing", "awaiting vault verification", req.String())

// update the status of the resource in kubernetes
status.Message = "Waiting for secretclient keyvault to be available"
return ctrl.Result{RequeueAfter: requeDuration}, r.Status().Update(ctx, obj)
return ctrl.Result{RequeueAfter: requeueDuration}, r.Status().Update(ctx, obj)
}
}

Expand Down Expand Up @@ -118,7 +119,7 @@ func (r *AsyncReconciler) Reconcile(req ctrl.Request, obj runtime.Object) (resul
}
} else {
if HasFinalizer(res, finalizerName) {
if len(KeyVaultName) != 0 { //KeyVault was specified in Spec, so use that for secrets
if len(keyVaultName) != 0 { // keyVault was specified in Spec, so use that for secrets
configOptions = append(configOptions, resourcemanager.WithSecretClient(keyvaultSecretClient))
}
found, deleteErr := r.AzureClient.Delete(ctx, obj, configOptions...)
Expand All @@ -134,7 +135,7 @@ func (r *AsyncReconciler) Reconcile(req ctrl.Request, obj runtime.Object) (resul
return ctrl.Result{}, r.Update(ctx, obj)
}
r.Telemetry.LogInfoByInstance("requeuing", "deletion unfinished", req.String())
return ctrl.Result{RequeueAfter: requeDuration}, r.Status().Update(ctx, obj)
return ctrl.Result{RequeueAfter: requeueDuration}, r.Status().Update(ctx, obj)
}
return ctrl.Result{}, nil
}
Expand All @@ -158,7 +159,7 @@ func (r *AsyncReconciler) Reconcile(req ctrl.Request, obj runtime.Object) (resul

r.Telemetry.LogInfoByInstance("status", "reconciling object", req.String())

if len(KeyVaultName) != 0 { //KeyVault was specified in Spec, so use that for secrets
if len(keyVaultName) != 0 { //KeyVault was specified in Spec, so use that for secrets
configOptions = append(configOptions, resourcemanager.WithSecretClient(keyvaultSecretClient))
}

Expand Down Expand Up @@ -191,7 +192,7 @@ func (r *AsyncReconciler) Reconcile(req ctrl.Request, obj runtime.Object) (resul
result = ctrl.Result{}
if !done {
r.Telemetry.LogInfoByInstance("status", "reconciling object not finished", req.String())
result.RequeueAfter = requeDuration
result.RequeueAfter = requeueDuration
} else {
r.Telemetry.LogInfoByInstance("reconciling", "success", req.String())

Expand Down
Loading