Skip to content
This repository has been archived by the owner on Jul 17, 2024. It is now read-only.

feat: Support volume #194

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 8 additions & 26 deletions agent/api/types/inference_deployment.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package types

import (
modelzetes "github.com/tensorchord/openmodelz/modelzetes/pkg/apis/modelzetes/v2alpha1"
)

// InferenceDeployment represents a request to create or update a Model.
type InferenceDeployment struct {
Spec InferenceDeploymentSpec `json:"spec"`
Expand All @@ -14,7 +18,7 @@ type InferenceDeploymentSpec struct {
Namespace string `json:"namespace,omitempty"`

// Scaling is the scaling configuration for the inference.
Scaling *ScalingConfig `json:"scaling,omitempty"`
Scaling *modelzetes.ScalingConfig `json:"scaling,omitempty"`

// Framework is the inference framework.
Framework Framework `json:"framework,omitempty"`
Expand Down Expand Up @@ -48,6 +52,9 @@ type InferenceDeploymentSpec struct {

// Resources are the compute resource requirements.
Resources *ResourceRequirements `json:"resources,omitempty"`

// Volumes are the volumes to mount.
Volumes []modelzetes.VolumeConfig `json:"volumes,omitempty"`
}

// Framework is the inference framework. It is only used to set the default port
Expand All @@ -63,31 +70,6 @@ const (
FrameworkOther Framework = "other"
)

type ScalingConfig struct {
// MinReplicas is the lower limit for the number of replicas to which the
// autoscaler can scale down. It defaults to 0.
MinReplicas *int32 `json:"min_replicas,omitempty"`
// MaxReplicas is the upper limit for the number of replicas to which the
// autoscaler can scale up. It cannot be less that minReplicas. It defaults
// to 1.
MaxReplicas *int32 `json:"max_replicas,omitempty"`
// TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.
TargetLoad *int32 `json:"target_load,omitempty"`
// Type is the scaling type. It can be either "capacity" or "rps". Default is "capacity".
Type *ScalingType `json:"type,omitempty"`
// ZeroDuration is the duration (in seconds) of zero load before scaling down to zero. Default is 5 minutes.
ZeroDuration *int32 `json:"zero_duration,omitempty"`
// StartupDuration is the duration (in seconds) of startup time.
StartupDuration *int32 `json:"startup_duration,omitempty"`
}

type ScalingType string

const (
ScalingTypeCapacity ScalingType = "capacity"
ScalingTypeRPS ScalingType = "rps"
)

// ResourceRequirements describes the compute resource requirements.
type ResourceRequirements struct {
// Limits describes the maximum amount of compute resources allowed.
Expand Down
15 changes: 10 additions & 5 deletions agent/api/types/secret.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ type Secret struct {
// Namespace if applicable for the secret
Namespace string `json:"namespace,omitempty"`

// Value is a string representing the string's value
Value string `json:"value,omitempty"`
// Data contains the secret data. Each key must consist of alphanumeric
// characters, '-', '_' or '.'. The serialized form of the secret data is a
// base64 encoded string, representing the arbitrary (possibly non-string)
// data value here. Described in https://tools.ietf.org/html/rfc4648#section-4
Data map[string][]byte `json:"data,omitempty"`

// RawValue can be used to provide binary data when
// Value is not set
RawValue []byte `json:"rawValue,omitempty"`
// stringData allows specifying non-binary secret data in string form.
// It is provided as a write-only input field for convenience.
// All keys and values are merged into the data field on write, overwriting any existing values.
// The stringData field is never output when reading from the API.
StringData map[string]string `json:"stringData,omitempty"`
}
1 change: 1 addition & 0 deletions agent/client/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const (
gatewayBuildControlPlanePath = "/system/build"
gatewayBuildInstanceControlPlanePath = "/system/build/%s"
gatewayImageCacheControlPlanePath = "/system/image-cache"
gatewaySecretControlPlanePath = "/system/secrets"
modelzCloudClusterControlPlanePath = "/api/v1/users/%s/clusters/%s"
modelzCloudClusterWithUserControlPlanePath = "/api/v1/users/%s/clusters"
modelzCloudClusterAPIKeyControlPlanePath = "/api/v1/users/%s/clusters/%s/api_keys"
Expand Down
16 changes: 16 additions & 0 deletions agent/client/secret_create.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package client

import (
"context"
"net/url"

"github.com/tensorchord/openmodelz/agent/api/types"
)

func (cli *Client) SecretCreate(ctx context.Context, secret types.Secret) error {
urlValues := url.Values{}

resp, err := cli.post(ctx, gatewaySecretControlPlanePath, urlValues, secret, nil)
defer ensureReaderClosed(resp)
return wrapResponseError(err, resp, "secret", secret.Namespace+"/"+secret.Name)
}
153 changes: 132 additions & 21 deletions agent/pkg/docs/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1166,6 +1166,40 @@ const docTemplate = `{
}
}
},
"/system/secrets": {
"post": {
"description": "Create the secret.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"secret"
],
"summary": "Create the secret.",
"parameters": [
{
"description": "Secret",
"name": "body",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/types.Secret"
}
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"$ref": "#/definitions/types.Secret"
}
}
}
}
},
"/system/server/{name}/delete": {
"delete": {
"description": "Delete a node.",
Expand Down Expand Up @@ -1472,14 +1506,21 @@ const docTemplate = `{
},
"scaling": {
"description": "Scaling is the scaling configuration for the inference.",
"$ref": "#/definitions/types.ScalingConfig"
"$ref": "#/definitions/v2alpha1.ScalingConfig"
},
"secrets": {
"description": "Secrets list of secrets to be made available to inference.",
"type": "array",
"items": {
"type": "string"
}
},
"volumes": {
"description": "Volumes are the volumes to mount.",
"type": "array",
"items": {
"$ref": "#/definitions/v2alpha1.VolumeConfig"
}
}
}
},
Expand Down Expand Up @@ -1585,6 +1626,10 @@ const docTemplate = `{
"osImage": {
"description": "OS Image reported by the node from /etc/os-release (e.g. Debian GNU/Linux 7 (wheezy)).",
"type": "string"
},
"resourceType": {
"description": "The Resource Type reported by the node",
"type": "string"
}
}
},
Expand Down Expand Up @@ -1638,32 +1683,33 @@ const docTemplate = `{
}
}
},
"types.ScalingConfig": {
"types.Secret": {
"type": "object",
"properties": {
"max_replicas": {
"description": "MaxReplicas is the upper limit for the number of replicas to which the\nautoscaler can scale up. It cannot be less that minReplicas. It defaults\nto 1.",
"type": "integer"
},
"min_replicas": {
"description": "MinReplicas is the lower limit for the number of replicas to which the\nautoscaler can scale down. It defaults to 0.",
"type": "integer"
},
"startup_duration": {
"description": "StartupDuration is the duration (in seconds) of startup time.",
"type": "integer"
"data": {
"description": "Data contains the secret data. Each key must consist of alphanumeric\ncharacters, '-', '_' or '.'. The serialized form of the secret data is a\nbase64 encoded string, representing the arbitrary (possibly non-string)\ndata value here. Described in https://tools.ietf.org/html/rfc4648#section-4",
"type": "object",
"additionalProperties": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"target_load": {
"description": "TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.",
"type": "integer"
"name": {
"description": "Name of the secret",
"type": "string"
},
"type": {
"description": "Type is the scaling type. It can be either \"capacity\" or \"rps\". Default is \"capacity\".",
"namespace": {
"description": "Namespace if applicable for the secret",
"type": "string"
},
"zero_duration": {
"description": "ZeroDuration is the duration (in seconds) of zero load before scaling down to zero. Default is 5 minutes.",
"type": "integer"
"stringData": {
"description": "stringData allows specifying non-binary secret data in string form.\nIt is provided as a write-only input field for convenience.\nAll keys and values are merged into the data field on write, overwriting any existing values.\nThe stringData field is never output when reading from the API.",
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
},
Expand Down Expand Up @@ -1737,6 +1783,71 @@ const docTemplate = `{
"type": "string"
}
}
},
"v2alpha1.ScalingConfig": {
"type": "object",
"properties": {
"max_replicas": {
"description": "MaxReplicas is the upper limit for the number of replicas to which the\nautoscaler can scale up. It cannot be less that minReplicas. It defaults\nto 1.",
"type": "integer"
},
"min_replicas": {
"description": "MinReplicas is the lower limit for the number of replicas to which the\nautoscaler can scale down. It defaults to 0.",
"type": "integer"
},
"startup_duration": {
"description": "StartupDuration is the duration of startup time.",
"type": "integer"
},
"target_load": {
"description": "TargetLoad is the target load. In capacity mode, it is the expected number of the inflight requests per replica.",
"type": "integer"
},
"type": {
"description": "Type is the scaling type. It can be either \"capacity\" or \"rps\". Default is \"capacity\".",
"type": "string"
},
"zero_duration": {
"description": "ZeroDuration is the duration of zero load before scaling down to zero. Default is 5 minutes.",
"type": "integer"
}
}
},
"v2alpha1.VolumeConfig": {
"type": "object",
"properties": {
"mount_option": {
"description": "MountOption is the mount option.",
"type": "string"
},
"mount_path": {
"description": "MountPath is the path in pod to mount the volume.",
"type": "string"
},
"name": {
"description": "Name is the name of the volume.",
"type": "string"
},
"node_name": {
"description": "NodeNames are the name list of the node. It is only used for local volume.",
"type": "array",
"items": {
"type": "string"
}
},
"secret_name": {
"description": "SecretName is the name of the secret. It is only used for object storage volume.",
"type": "string"
},
"sub_path": {
"description": "SubPath is the sub path of the volume.",
"type": "string"
},
"type": {
"description": "Type of the volume.",
"type": "string"
}
}
}
}
}`
Expand Down
4 changes: 2 additions & 2 deletions agent/pkg/k8s/convert_inference.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ func AsInferenceDeployment(inf *v2alpha1.Inference, item *appsv1.Deployment) *ty
}

if inf.Spec.Scaling != nil {
res.Spec.Scaling = &types.ScalingConfig{
res.Spec.Scaling = &v2alpha1.ScalingConfig{
MinReplicas: inf.Spec.Scaling.MinReplicas,
MaxReplicas: inf.Spec.Scaling.MaxReplicas,
TargetLoad: inf.Spec.Scaling.TargetLoad,
ZeroDuration: inf.Spec.Scaling.ZeroDuration,
StartupDuration: inf.Spec.Scaling.StartupDuration,
}
if inf.Spec.Scaling.Type != nil {
typ := types.ScalingType(*inf.Spec.Scaling.Type)
typ := v2alpha1.ScalingType(*inf.Spec.Scaling.Type)
res.Spec.Scaling.Type = &typ
}
}
Expand Down
4 changes: 2 additions & 2 deletions agent/pkg/k8s/convert_inference_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ var _ = Describe("agent/pkg/k8s/convert_inference", func() {
}),
expect: Ptr(types.InferenceDeployment{
Spec: types.InferenceDeploymentSpec{
Scaling: Ptr(types.ScalingConfig{
Type: Ptr(types.ScalingTypeCapacity),
Scaling: Ptr(v2alpha1.ScalingConfig{
Type: Ptr(v2alpha1.ScalingTypeCapacity),
}),
},
Status: types.InferenceDeploymentStatus{
Expand Down
Loading
Loading