argoproj · alexec · Nov 23, 2020 · Nov 23, 2020 · Nov 23, 2020 · Nov 23, 2020
diff --git a/.github/workflows/ci-build.yaml b/.github/workflows/ci-build.yaml
@@ -45,8 +45,13 @@ jobs:
         # kubelet is not included because it'd take ages to get it working methinks
         test: [ "smoke", "test-e2e", "test-cli", "test-e2e-cron" ]
         containerRuntimeExecutor: [ "docker", "k8sapi", "pns" ]
+        profile: ["minimal", "mysql"]
         # ok, so we're only running `smoke` for all CREs,
         exclude:
+          - test: smoke
+            profile: mysql
+          - test: test-e2e-cron
+            profile: mysql
           - test: test-e2e
             containerRuntimeExecutor: k8sapi
           - test: test-e2e
@@ -91,16 +96,15 @@ jobs:
           echo "    token: xxxxxx" >> ~/.kube/config
       - name: Start Argo
         env:
-          GOPATH: /home/runner/go
-          PROFILE: mysql
+          GOPATH: /home/runner/go]
         run: |
           echo '127.0.0.1 dex'      | sudo tee -a /etc/hosts
           echo '127.0.0.1 minio'    | sudo tee -a /etc/hosts
           echo '127.0.0.1 postgres' | sudo tee -a /etc/hosts
           echo '127.0.0.1 mysql'    | sudo tee -a /etc/hosts
           mkdir -p /tmp/log/argo-e2e
           git fetch --tags
-          KUBECONFIG=~/.kube/config make start PROFILE=$PROFILE E2E_EXECUTOR=${{matrix.containerRuntimeExecutor}} DEV_IMAGE=true STATIC_FILES=false 2>&1 > /tmp/log/argo-e2e/argo.log &
+          KUBECONFIG=~/.kube/config make -d start PROFILE=${{matrix.profile}} E2E_EXECUTOR=${{matrix.containerRuntimeExecutor}} DEV_IMAGE=true STATIC_FILES=false 2>&1 > /tmp/log/argo-e2e/argo.log &
       - name: Install gotestsum
         run: go install gotest.tools/gotestsum
       - name: Wait for Argo Server to be ready

diff --git a/Makefile b/Makefile
@@ -67,14 +67,7 @@ K3D                   := $(shell if [[ "`which kubectl`" != '' ]] && [[ "`kubect
 LOG_LEVEL             := debug
 UPPERIO_DB_DEBUG      := 0
 NAMESPACED            := true
-
-ALWAYS_OFFLOAD_NODE_STATUS := false
-ifeq ($(PROFILE),mysql)
-ALWAYS_OFFLOAD_NODE_STATUS := true
-endif
-ifeq ($(PROFILE),postgres)
 ALWAYS_OFFLOAD_NODE_STATUS := true
-endif
 
 override LDFLAGS += \
   -X github.com/argoproj/argo.version=$(VERSION) \

diff --git a/config/config.go b/config/config.go
@@ -137,14 +137,15 @@ type PersistConfig struct {
 	NodeStatusOffload bool `json:"nodeStatusOffLoad,omitempty"`
 	// Archive workflows to persistence.
 	Archive bool `json:"archive,omitempty"`
-	// ArchivelabelSelector holds LabelSelector to determine workflow persistence.
+	// ArchiveLabelSelector holds LabelSelector to determine workflow persistence.
 	ArchiveLabelSelector *metav1.LabelSelector `json:"archiveLabelSelector,omitempty"`
 	// in days
 	ArchiveTTL     TTL               `json:"archiveTTL,omitempty"`
 	ClusterName    string            `json:"clusterName,omitempty"`
 	ConnectionPool *ConnectionPool   `json:"connectionPool,omitempty"`
 	PostgreSQL     *PostgreSQLConfig `json:"postgresql,omitempty"`
 	MySQL          *MySQLConfig      `json:"mysql,omitempty"`
+	S3             *S3Config         `json:"s3,omitempty"`
 }
 
 func (c PersistConfig) GetArchiveLabelSelector() (labels.Selector, error) {
@@ -161,12 +162,44 @@ func (c PersistConfig) GetClusterName() string {
 	return "default"
 }
 
+func (c PersistConfig) GetNodeStatusOffloadConfig() interface{} {
+	if c.S3 != nil && c.S3.NodeStatusOffloads != nil {
+		return c.S3.NodeStatusOffloads
+	}
+	return c.SQLConfig()
+}
+
+func (c PersistConfig) GetArchiveConfig() interface{} {
+	sqlConfig := c.SQLConfig()
+	if sqlConfig != nil {
+		return sqlConfig
+	}
+	if c.S3 != nil && c.S3.Archive != nil {
+		return c.S3.Archive
+	}
+	return nil
+}
+
+func (c PersistConfig) SQLConfig() interface{} {
+	if c.MySQL != nil {
+		return c.MySQL
+	} else if c.PostgreSQL != nil {
+		return c.PostgreSQL
+	}
+	return nil
+}
+
 type ConnectionPool struct {
 	MaxIdleConns    int `json:"maxIdleConns,omitempty"`
 	MaxOpenConns    int `json:"maxOpenConns,omitempty"`
 	ConnMaxLifetime TTL `json:"connMaxLifetime,omitempty"`
 }
 
+type S3Config struct {
+	NodeStatusOffloads *S3ArtifactRepository `json:"nodeStatusOffLoad,omitempty"`
+	Archive            *S3ArtifactRepository `json:"archive,omitempty"`
+}
+
 type PostgreSQLConfig struct {
 	Host           string                  `json:"host"`
 	Port           int                     `json:"port"`
@@ -200,6 +233,10 @@ type S3ArtifactRepository struct {
 	KeyPrefix string `json:"keyPrefix,omitempty"`
 }
 
+func (r S3ArtifactRepository) Secure() bool {
+	return r.Insecure == nil || !*r.Insecure
+}
+
 // OSSArtifactRepository defines the controller configuration for an OSS artifact repository
 type OSSArtifactRepository struct {
 	wfv1.OSSBucket `json:",inline"`

diff --git a/docs/offloading-large-workflows.md b/docs/offloading-large-workflows.md
@@ -6,13 +6,13 @@
 
 Argo stores workflows as Kubernetes resources (i.e. within EtcD). This creates a limit to their size as resources must be under 1MB. Each resource includes the status of each node, which is stored in the `/status/nodes` field for the resource. This can be over 1MB. If this happens, we try and compress the node status and store it in `/status/compressedNodes`. If the status is still too large, we then try and store it in an SQL database. 
 
-To enable this feature, configure a Postgres or MySQL database under `persistence` in [your configuration](workflow-controller-configmap.yaml) and set `nodeStatusOffLoad: true`.
+To enable this feature, configure a S3, Postgres or MySQL under `persistence` in [your configuration](workflow-controller-configmap.yaml) and set `nodeStatusOffLoad: true`.
 
 ## FAQ
 
 #### Why aren't my workflows appearing in the database? 
 
-Offloading is expensive and often unneccessary, so we only offload when we need to. Your workflows aren't probably large enough.
+Offloading is expensive and often unnecessary, so we only offload when we need to. Your workflows aren't probably large enough.
 
 
 #### Error "Failed to submit workflow: etcdserver: request is too large."

diff --git a/docs/workflow-archive.md b/docs/workflow-archive.md
@@ -6,4 +6,4 @@
 
 For many uses, you may wish to keep workflows for a long time. Argo can save completed workflows to an SQL database. 
 
-To enable this feature, configure a Postgres or MySQL (>= 5.7.8) database under `persistence` in [your configuration](workflow-controller-configmap.yaml) and set `archive: true`.
+To enable this feature, configure a S3, Postgres or MySQL (>= 5.7.8) under `persistence` in [your configuration](workflow-controller-configmap.yaml) and set `archive: true`.
diff --git a/docs/workflow-controller-configmap.yaml b/docs/workflow-controller-configmap.yaml
@@ -162,7 +162,9 @@ data:
       nodeStatusOffLoad: false
       # save completed workloads to the workflow archive
       archive: false
-      # the number of days to keep archived workflows (the default is forever)
+      # The number of days to keep archived workflows (the default is forever).
+      # Archive TTL is ignored for S3 as it cannot be after the bucket is created.
+      # Instead, set the retention policy when you create the bucket.
       archiveTTL: 180d
 
       # LabelSelector determines the workflow that matches with the matchlabels or matchrequirements, will be archived.
@@ -173,6 +175,27 @@ data:
 
       # Optional name of the cluster I'm running in. This must be unique for your cluster.
       clusterName: default
+
+      # Storage: s3, mysql, or postgres maybe configured.
+      #
+      # If mulitple are configured then the following rules are used:
+      #
+      # 1. For node status offloading, S3 is most preference, for archiving, least prefered.
+      # 2. MySQL is prefered to PostgreSQL
+      #
+      # S3 is most prefered (and therefore implicitly recommeded) as it can be expected to be faster,
+      # and relies less on the querying capability of an SQL database.
+      #
+      # S3 is least prefered for workflow archiving. While is is easier to set-up that an SQL database, S3 does not
+      # support (out of the box) indexed backed (i.e. fast) querying capabilities for listing archived workflows.
+      # List operations will be slow when any significant number of workflows are archived.
+
+      # Unlike MySQL or Postgres, S3 enforces TTLs by setting them on the bucket.
+      # You MUST configure different buckets for the archive and offloads if you wish to TTL archived workflows.
+      s3: |
+        nodeStatusOffload: # ... as artifactRepository.s3
+        archive: # ... as artifactRepository.s3
+
       postgresql:
         host: localhost
         port: 5432

diff --git a/go.mod b/go.mod
@@ -37,6 +37,7 @@ require (
 	github.com/klauspost/compress v1.9.7 // indirect
 	github.com/lib/pq v1.3.0 // indirect
 	github.com/mattn/goreman v0.3.5
+	github.com/minio/minio-go/v7 v7.0.2
 	github.com/mitchellh/go-ps v0.0.0-20190716172923-621e5597135b
 	github.com/pkg/errors v0.9.1
 	github.com/prometheus/client_golang v1.0.0

diff --git a/hack/crdgen.sh b/hack/crdgen.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-set -eu -o pipefail
+set -eux -o pipefail
 
 cd "$(dirname "$0")/.."
 

diff --git a/manifests/quick-start-minimal.yaml b/manifests/quick-start-minimal.yaml
@@ -490,6 +490,30 @@ data:
     enabled: true
     path: /metrics
     port: 9090
+  persistence: |
+    nodeStatusOffLoad: true
+    archive: true
+    s3:
+      archive:
+        bucket: my-workflow-archive
+        endpoint: minio:9000
+        insecure: true
+        accessKeySecret:
+          name: my-minio-cred
+          key: accesskey
+        secretKeySecret:
+          name: my-minio-cred
+          key: secretkey
+      nodeStatusOffload:
+        bucket: my-node-status-offloads
+        endpoint: minio:9000
+        insecure: true
+        accessKeySecret:
+          name: my-minio-cred
+          key: accesskey
+        secretKeySecret:
+          name: my-minio-cred
+          key: secretkey
 kind: ConfigMap
 metadata:
   name: workflow-controller-configmap
@@ -670,6 +694,8 @@ spec:
           - mkdir
           - -p
           - /data/my-bucket
+          - /data/my-node-status-offloads
+          - /data/my-workflow-archive
     livenessProbe:
       httpGet:
         path: /minio/health/live

diff --git a/manifests/quick-start-mysql.yaml b/manifests/quick-start-mysql.yaml
@@ -759,6 +759,8 @@ spec:
           - mkdir
           - -p
           - /data/my-bucket
+          - /data/my-node-status-offloads
+          - /data/my-workflow-archive
     livenessProbe:
       httpGet:
         path: /minio/health/live

diff --git a/manifests/quick-start-postgres.yaml b/manifests/quick-start-postgres.yaml
@@ -751,6 +751,8 @@ spec:
           - mkdir
           - -p
           - /data/my-bucket
+          - /data/my-node-status-offloads
+          - /data/my-workflow-archive
     livenessProbe:
       httpGet:
         path: /minio/health/live

diff --git a/manifests/quick-start/base/minio/minio-pod.yaml b/manifests/quick-start/base/minio/minio-pod.yaml
@@ -19,7 +19,7 @@ spec:
       lifecycle:
         postStart:
           exec:
-            command: [mkdir, -p, /data/my-bucket]
+            command: [mkdir, -p, /data/my-bucket, /data/my-node-status-offloads, /data/my-workflow-archive]
       readinessProbe:
         httpGet:
           path: /minio/health/ready

diff --git a/manifests/quick-start/base/overlays/workflow-controller-configmap.yaml b/manifests/quick-start/base/overlays/workflow-controller-configmap.yaml
@@ -17,6 +17,30 @@ data:
     enabled: true
     path: /metrics
     port: 9090
+  persistence: |
+    nodeStatusOffLoad: true
+    archive: true
+    s3:
+      archive:
+        bucket: my-workflow-archive
+        endpoint: minio:9000
+        insecure: true
+        accessKeySecret:
+          name: my-minio-cred
+          key: accesskey
+        secretKeySecret:
+          name: my-minio-cred
+          key: secretkey
+      nodeStatusOffload:
+        bucket: my-node-status-offloads
+        endpoint: minio:9000
+        insecure: true
+        accessKeySecret:
+          name: my-minio-cred
+          key: accesskey
+        secretKeySecret:
+          name: my-minio-cred
+          key: secretkey
   links: |
     - name: Example Workflow Link
       scope: workflow

diff --git a/...ldb/explosive_offload_node_status_repo.go → ...ist/explosive_offload_node_status_repo.go b/...ldb/explosive_offload_node_status_repo.go → ...ist/explosive_offload_node_status_repo.go
@@ -1,4 +1,4 @@
-package sqldb
+package persist
 
 import (
 	"fmt"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,4 +6,4 @@

		For many uses, you may wish to keep workflows for a long time. Argo can save completed workflows to an SQL database.

		To enable this feature, configure a Postgres or MySQL (>= 5.7.8) database under `persistence` in [your configuration](workflow-controller-configmap.yaml) and set `archive: true`.
		To enable this feature, configure a S3, Postgres or MySQL (>= 5.7.8) under `persistence` in [your configuration](workflow-controller-configmap.yaml) and set `archive: true`.