tensorchord · xieydd · Oct 30, 2023 · Oct 30, 2023 · Nov 3, 2023
diff --git a/agent/api/types/inference_deployment.go b/agent/api/types/inference_deployment.go
@@ -1,5 +1,9 @@
 package types
 
+import (
+	modelzetes "github.com/tensorchord/openmodelz/modelzetes/pkg/apis/modelzetes/v2alpha1"
+)
+
 // InferenceDeployment represents a request to create or update a Model.
 type InferenceDeployment struct {
 	Spec   InferenceDeploymentSpec   `json:"spec"`
@@ -14,7 +18,7 @@ type InferenceDeploymentSpec struct {
 	Namespace string `json:"namespace,omitempty"`
 
 	// Scaling is the scaling configuration for the inference.
-	Scaling *ScalingConfig `json:"scaling,omitempty"`
+	Scaling *modelzetes.ScalingConfig `json:"scaling,omitempty"`
 
 	// Framework is the inference framework.
 	Framework Framework `json:"framework,omitempty"`
@@ -48,6 +52,9 @@ type InferenceDeploymentSpec struct {
 
 	// Resources are the compute resource requirements.
 	Resources *ResourceRequirements `json:"resources,omitempty"`
+
+	// Volumes are the volumes to mount.
+	Volumes []modelzetes.VolumeConfig `json:"volumes,omitempty"`
 }
 
 // Framework is the inference framework. It is only used to set the default port
@@ -63,31 +70,6 @@ const (
 	FrameworkOther     Framework = "other"
 )
 
-type ScalingConfig struct {
-	// MinReplicas is the lower limit for the number of replicas to which the
-	// autoscaler can scale down. It defaults to 0.
-	MinReplicas *int32 `json:"min_replicas,omitempty"`
-	// MaxReplicas is the upper limit for the number of replicas to which the
-	// autoscaler can scale up. It cannot be less that minReplicas. It defaults
-	// to 1.
-	MaxReplicas *int32 `json:"max_replicas,omitempty"`
-	// TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.
-	TargetLoad *int32 `json:"target_load,omitempty"`
-	// Type is the scaling type. It can be either "capacity" or "rps". Default is "capacity".
-	Type *ScalingType `json:"type,omitempty"`
-	// ZeroDuration is the duration (in seconds) of zero load before scaling down to zero. Default is 5 minutes.
-	ZeroDuration *int32 `json:"zero_duration,omitempty"`
-	// StartupDuration is the duration (in seconds) of startup time.
-	StartupDuration *int32 `json:"startup_duration,omitempty"`
-}
-
-type ScalingType string
-
-const (
-	ScalingTypeCapacity ScalingType = "capacity"
-	ScalingTypeRPS      ScalingType = "rps"
-)
-
 // ResourceRequirements describes the compute resource requirements.
 type ResourceRequirements struct {
 	// Limits describes the maximum amount of compute resources allowed.

diff --git a/agent/api/types/secret.go b/agent/api/types/secret.go
@@ -8,10 +8,15 @@ type Secret struct {
 	// Namespace if applicable for the secret
 	Namespace string `json:"namespace,omitempty"`
 
-	// Value is a string representing the string's value
-	Value string `json:"value,omitempty"`
+	// Data contains the secret data. Each key must consist of alphanumeric
+	// characters, '-', '_' or '.'. The serialized form of the secret data is a
+	// base64 encoded string, representing the arbitrary (possibly non-string)
+	// data value here. Described in https://tools.ietf.org/html/rfc4648#section-4
+	Data map[string][]byte `json:"data,omitempty"`
 
-	// RawValue can be used to provide binary data when
-	// Value is not set
-	RawValue []byte `json:"rawValue,omitempty"`
+	// stringData allows specifying non-binary secret data in string form.
+	// It is provided as a write-only input field for convenience.
+	// All keys and values are merged into the data field on write, overwriting any existing values.
+	// The stringData field is never output when reading from the API.
+	StringData map[string]string `json:"stringData,omitempty"`
 }
diff --git a/agent/client/const.go b/agent/client/const.go
@@ -24,6 +24,7 @@ const (
 	gatewayBuildControlPlanePath                      = "/system/build"
 	gatewayBuildInstanceControlPlanePath              = "/system/build/%s"
 	gatewayImageCacheControlPlanePath                 = "/system/image-cache"
+	gatewaySecretControlPlanePath                     = "/system/secrets"
 	modelzCloudClusterControlPlanePath                = "/api/v1/users/%s/clusters/%s"
 	modelzCloudClusterWithUserControlPlanePath        = "/api/v1/users/%s/clusters"
 	modelzCloudClusterAPIKeyControlPlanePath          = "/api/v1/users/%s/clusters/%s/api_keys"

diff --git a/agent/client/secret_create.go b/agent/client/secret_create.go
@@ -0,0 +1,16 @@
+package client
+
+import (
+	"context"
+	"net/url"
+
+	"github.com/tensorchord/openmodelz/agent/api/types"
+)
+
+func (cli *Client) SecretCreate(ctx context.Context, secret types.Secret) error {
+	urlValues := url.Values{}
+
+	resp, err := cli.post(ctx, gatewaySecretControlPlanePath, urlValues, secret, nil)
+	defer ensureReaderClosed(resp)
+	return wrapResponseError(err, resp, "secret", secret.Namespace+"/"+secret.Name)
+}
diff --git a/agent/pkg/docs/docs.go b/agent/pkg/docs/docs.go
@@ -1166,6 +1166,40 @@ const docTemplate = `{
                 }
             }
         },
+        "/system/secrets": {
+            "post": {
+                "description": "Create the secret.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "secret"
+                ],
+                "summary": "Create the secret.",
+                "parameters": [
+                    {
+                        "description": "Secret",
+                        "name": "body",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/types.Secret"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/types.Secret"
+                        }
+                    }
+                }
+            }
+        },
         "/system/server/{name}/delete": {
             "delete": {
                 "description": "Delete a node.",
@@ -1472,14 +1506,21 @@ const docTemplate = `{
                 },
                 "scaling": {
                     "description": "Scaling is the scaling configuration for the inference.",
-                    "$ref": "#/definitions/types.ScalingConfig"
+                    "$ref": "#/definitions/v2alpha1.ScalingConfig"
                 },
                 "secrets": {
                     "description": "Secrets list of secrets to be made available to inference.",
                     "type": "array",
                     "items": {
                         "type": "string"
                     }
+                },
+                "volumes": {
+                    "description": "Volumes are the volumes to mount.",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/v2alpha1.VolumeConfig"
+                    }
                 }
             }
         },
@@ -1585,6 +1626,10 @@ const docTemplate = `{
                 "osImage": {
                     "description": "OS Image reported by the node from /etc/os-release (e.g. Debian GNU/Linux 7 (wheezy)).",
                     "type": "string"
+                },
+                "resourceType": {
+                    "description": "The Resource Type reported by the node",
+                    "type": "string"
                 }
             }
         },
@@ -1638,32 +1683,33 @@ const docTemplate = `{
                 }
             }
         },
-        "types.ScalingConfig": {
+        "types.Secret": {
             "type": "object",
             "properties": {
-                "max_replicas": {
-                    "description": "MaxReplicas is the upper limit for the number of replicas to which the\nautoscaler can scale up. It cannot be less that minReplicas. It defaults\nto 1.",
-                    "type": "integer"
-                },
-                "min_replicas": {
-                    "description": "MinReplicas is the lower limit for the number of replicas to which the\nautoscaler can scale down. It defaults to 0.",
-                    "type": "integer"
-                },
-                "startup_duration": {
-                    "description": "StartupDuration is the duration (in seconds) of startup time.",
-                    "type": "integer"
+                "data": {
+                    "description": "Data contains the secret data. Each key must consist of alphanumeric\ncharacters, '-', '_' or '.'. The serialized form of the secret data is a\nbase64 encoded string, representing the arbitrary (possibly non-string)\ndata value here. Described in https://tools.ietf.org/html/rfc4648#section-4",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "array",
+                        "items": {
+                            "type": "integer"
+                        }
+                    }
                 },
-                "target_load": {
-                    "description": "TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.",
-                    "type": "integer"
+                "name": {
+                    "description": "Name of the secret",
+                    "type": "string"
                 },
-                "type": {
-                    "description": "Type is the scaling type. It can be either \"capacity\" or \"rps\". Default is \"capacity\".",
+                "namespace": {
+                    "description": "Namespace if applicable for the secret",
                     "type": "string"
                 },
-                "zero_duration": {
-                    "description": "ZeroDuration is the duration (in seconds) of zero load before scaling down to zero. Default is 5 minutes.",
-                    "type": "integer"
+                "stringData": {
+                    "description": "stringData allows specifying non-binary secret data in string form.\nIt is provided as a write-only input field for convenience.\nAll keys and values are merged into the data field on write, overwriting any existing values.\nThe stringData field is never output when reading from the API.",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 }
             }
         },
@@ -1737,6 +1783,71 @@ const docTemplate = `{
                     "type": "string"
                 }
             }
+        },
+        "v2alpha1.ScalingConfig": {
+            "type": "object",
+            "properties": {
+                "max_replicas": {
+                    "description": "MaxReplicas is the upper limit for the number of replicas to which the\nautoscaler can scale up. It cannot be less that minReplicas. It defaults\nto 1.",
+                    "type": "integer"
+                },
+                "min_replicas": {
+                    "description": "MinReplicas is the lower limit for the number of replicas to which the\nautoscaler can scale down. It defaults to 0.",
+                    "type": "integer"
+                },
+                "startup_duration": {
+                    "description": "StartupDuration is the duration of startup time.",
+                    "type": "integer"
+                },
+                "target_load": {
+                    "description": "TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.",
+                    "type": "integer"
+                },
+                "type": {
+                    "description": "Type is the scaling type. It can be either \"capacity\" or \"rps\". Default is \"capacity\".",
+                    "type": "string"
+                },
+                "zero_duration": {
+                    "description": "ZeroDuration is the duration of zero load before scaling down to zero. Default is 5 minutes.",
+                    "type": "integer"
+                }
+            }
+        },
+        "v2alpha1.VolumeConfig": {
+            "type": "object",
+            "properties": {
+                "mount_option": {
+                    "description": "MountOption is the mount option.",
+                    "type": "string"
+                },
+                "mount_path": {
+                    "description": "MountPath is the path in pod to mount the volume.",
+                    "type": "string"
+                },
+                "name": {
+                    "description": "Name is the name of the volume.",
+                    "type": "string"
+                },
+                "node_name": {
+                    "description": "NodeNames are the name list of the node. It is only used for local volume.",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "secret_name": {
+                    "description": "SecretName is the name of the secret. It is only used for object storage volume.",
+                    "type": "string"
+                },
+                "sub_path": {
+                    "description": "SubPath is the sub path of the volume.",
+                    "type": "string"
+                },
+                "type": {
+                    "description": "Type of the volume.",
+                    "type": "string"
+                }
+            }
         }
     }
 }`

diff --git a/agent/pkg/k8s/convert_inference.go b/agent/pkg/k8s/convert_inference.go
@@ -31,15 +31,15 @@ func AsInferenceDeployment(inf *v2alpha1.Inference, item *appsv1.Deployment) *ty
 	}
 
 	if inf.Spec.Scaling != nil {
-		res.Spec.Scaling = &types.ScalingConfig{
+		res.Spec.Scaling = &v2alpha1.ScalingConfig{
 			MinReplicas:     inf.Spec.Scaling.MinReplicas,
 			MaxReplicas:     inf.Spec.Scaling.MaxReplicas,
 			TargetLoad:      inf.Spec.Scaling.TargetLoad,
 			ZeroDuration:    inf.Spec.Scaling.ZeroDuration,
 			StartupDuration: inf.Spec.Scaling.StartupDuration,
 		}
 		if inf.Spec.Scaling.Type != nil {
-			typ := types.ScalingType(*inf.Spec.Scaling.Type)
+			typ := v2alpha1.ScalingType(*inf.Spec.Scaling.Type)
 			res.Spec.Scaling.Type = &typ
 		}
 	}

diff --git a/agent/pkg/k8s/convert_inference_test.go b/agent/pkg/k8s/convert_inference_test.go
@@ -112,8 +112,8 @@ var _ = Describe("agent/pkg/k8s/convert_inference", func() {
 				}),
 				expect: Ptr(types.InferenceDeployment{
 					Spec: types.InferenceDeploymentSpec{
-						Scaling: Ptr(types.ScalingConfig{
-							Type: Ptr(types.ScalingTypeCapacity),
+						Scaling: Ptr(v2alpha1.ScalingConfig{
+							Type: Ptr(v2alpha1.ScalingTypeCapacity),
 						}),
 					},
 					Status: types.InferenceDeploymentStatus{