CloudNativeAI · bergwolf · Sep 29, 2024 · Sep 25, 2024 · Sep 26, 2024 · Sep 26, 2024
diff --git a/docs/v2/modelfile.md b/docs/v2/modelfile.md
@@ -0,0 +1,95 @@
+# Introduction to Modelfile
+
+A Modelfile is a text file containing all commands, in order, needed to build a given model image. It automates the process of building model images.
+
+## Modelfile Instructions
+
+| **Instruction** | **Description** |
+| --- | --- |
+| CREATE | Create a new model image |
+| FROM | Specify the base model image to use |
+| NAME | Specify model name |
+| FAMILY | Specify model family |
+| ARCHITECTURE | Specify model architecture |
+| LICENSE | Specify the legal license under which the model is used |
+| CONFIG | Specify model configuration file |
+| WEIGHTS | Specify model weights file |
+| FORMAT | Specify model weights format |
+| TOKENIZER | Specify tokenizer configuration |
+
+## Modelfile Example
+
+```plain
+CREATE registry.cnai.com/sys/gemma-2b:latest
+
+# Model Information
+
+NAME         gemma-2b
+FAMILY       gemma
+ARCHITECTURE transformer
+FORMAT       safetensors
+
+# Model License
+
+LICENSE      examples/huggingface/gemma-2b/LICENSE
+
+# Model Configuration
+
+CONFIG       examples/huggingface/gemma-2b/config.json
+CONFIG       examples/huggingface/gemma-2b/generation_config.json
+
+# Model Tokenizer
+
+TOKENIZER    examples/huggingface/gemma-2b/tokenizer.json
+
+# Model Weights
+
+WEIGHTS      examples/huggingface/gemma-2b/model.safetensors.index.json
+WEIGHTS      examples/huggingface/gemma-2b/model-00001-of-00002.safetensors
+WEIGHTS      examples/huggingface/gemma-2b/model-00002-of-00002.safetensors
+
+```
+
+## Management tool
+
+We propose a model management tool, which is a command-line tool for building, managing, and running AI models.
+
+### build
+
+We can use Modelfile to build model images.
+
+```plain
+mdctl build -f ./Modelfile
+```
+
+### list
+
+We can list all the model images that have been pushed.
+
+```plain
+mdctl list
+```
+
+### push
+
+We can push the built model image to a model repository.
+
+```plain
+mdctl push <model-image>
+```
+
+### pull
+
+We can pull the model image from the model repository to local storage.
+
+```plain
+mdctl pull <model-image>
+```
+
+### unpack
+
+We can pull the model image to local storage and then use mdctl to run the model.
+
+```plain
+mdctl unpack <model-image>
+```
diff --git a/docs/v2/tool.md b/docs/v2/tool.md
@@ -0,0 +1,51 @@
+# mdctl - Model Control Tool
+
+`mdctl` is a command-line tool for building, managing, and running AI models.
+
+## Installation
+
+To install `mdctl`, clone the repository and build the binary:
+
+```plain
+git clone https://github.com/CloudNativeAI/mdctl.git
+cd mdctl
+go build
+```
+
+## Usage
+
+To build a model, use the `build` command:
+
+```plain
+./mdctl build -f Modelfile
+```
+
+To list all models, use the `list` command:
+
+```plain
+./mdctl list
+```
+
+To push a model, use the `push` command. Before pushing, you need to set the model registry credentials:
+
+```plain
+export MODEL_REGISTRY_USER=<username>
+export MODEL_REGISTRY_PASSWORD=<password>
+export MODEL_REGISTRY_URL=<registry_url>
+```
+
+```plain
+./mdctl push <model>
+```
+
+To pull a model, use the `pull` command:
+
+```plain
+./mdctl pull <model>
+```
+
+To run a model, use the `unpack` command:
+
+```plain
+./mdctl unpack -n <model>
+```
diff --git a/go.mod b/go.mod
@@ -1,3 +1,16 @@
 module github.com/CloudNativeAI/model-spec
 
 go 1.22.4
+
+require (
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/klauspost/compress v1.17.10 // indirect
+	github.com/opencontainers/go-digest v1.0.0 // indirect
+	github.com/opencontainers/image-spec v1.1.0 // indirect
+	github.com/spf13/cobra v1.8.1 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	golang.org/x/sync v0.6.0 // indirect
+	golang.org/x/sys v0.25.0 // indirect
+	golang.org/x/term v0.24.0 // indirect
+	oras.land/oras-go/v2 v2.5.0 // indirect
+)
diff --git a/go.sum b/go.sum
@@ -0,0 +1,24 @@
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/klauspost/compress v1.17.10 h1:oXAz+Vh0PMUvJczoi+flxpnBEPxoER1IaAnU/NMPtT0=
+github.com/klauspost/compress v1.17.10/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
+github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34=
+golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
+golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+oras.land/oras-go/v2 v2.5.0 h1:o8Me9kLY74Vp5uw07QXPiitjsw7qNXi8Twd+19Zf02c=
+oras.land/oras-go/v2 v2.5.0/go.mod h1:z4eisnLP530vwIOUOJeBIj0aGI0L1C3d53atvCBqZHg=
diff --git a/specs-go/v2/architecture.go b/specs-go/v2/architecture.go
@@ -0,0 +1,51 @@
+package v2
+
+// TransformerForCausalLLM represents the configuration of a transformer model for causal language modeling.
+// It defines the architecture and hyperparameters of the model.
+//
+// Supported features:
+// - Attention mechanisms: Multi-Head Attention (MHA) and Grouped Query Attention (GQA)
+// - Activation functions: GELU, ReLU
+// - Position embeddings: Rotary Position Embedding (RoPE)
+// - Normalization: RMSNorm (Root Mean Square Layer Normalization)
+//
+// This structure is designed to be flexible and accommodate various transformer architectures
+// used in state-of-the-art language models.
+type TransformerForCausalLLM struct {
+	// Version of the transformer architecture config
+	Version string `json:"version"`
+
+	// Vocabulary size of the model
+	VocabularySize int `json:"vocabulary_size"`
+
+	// The hidden size of the model, e.g. 768, 1024, 2048, etc.
+	HiddenSize int `json:"hidden_size"`
+
+	// The number of transformer layers of the model.
+	NumHiddenLayers int `json:"num_hidden_layers"`
+
+	// The number of attention heads, e.g. 12, 16, 32, etc.
+	NumAttentionHeads int `json:"num_attention_heads"`
+
+	// The number of key value heads, e.g. 1, 2, 4, etc.
+	// Only used by GQA attention mechanism.
+	NumKeyValueHeads int `json:"num_key_value_heads"`
+
+	// The activation function used by the pointwise feed-forward layers, e.g. 'gelu', 'relu', 'tanh', etc.
+	Activation string `json:"activation"`
+
+	// The intermediate size in the feed-forward layers. The non-linearity is applied in this intermediate size.
+	IntermediateSize int `json:"intermediate_size"`
+
+	// The rms_norm parameter
+	NormEpsilon float64 `json:"norm_epsilon"`
+
+	// The position embedding type, for example 'rope', 'sinusoidal', 'alibi', etc.
+	PositionEmbedding string `json:"position_embedding"`
+
+	// The base in signifying the rotary embedding period.
+	RotaryEmbeddingBase int `json:"rotary_embedding_base,omitempty"`
+
+	// Fraction of hidden size to apply rotary embeddings to. Must be in [0,1].
+	RotaryEmbeddingFraction float64 `json:"rotary_embedding_fraction,omitempty"`
+}
diff --git a/specs-go/v2/config.go b/specs-go/v2/config.go
@@ -0,0 +1,40 @@
+package v2
+
+import (
+	oci "github.com/opencontainers/image-spec/specs-go/v1"
+)
+
+// Config represents the JSON structure that encapsulates essential metadata and configuration details of a machine learning model.
+type Config struct {
+	// Name specifies the unique identifier or title of the model.
+	Name string `json:"name"`
+
+	// Family indicates the broader category or lineage of the model, such as 'GPT', 'LLAMA', or 'QWEN'.
+	// This helps in grouping related models or identifying their general capabilities.
+	Family string `json:"family"`
+
+	// Architecture defines the fundamental structure or design of the model,
+	// such as 'transformer', 'CNN' (Convolutional Neural Network), 'RNN' (Recurrent Neural Network), etc.
+	// This information is crucial for understanding the model's underlying principles and potential applications.
+	Architecture string `json:"architecture"`
+
+	// Description provides detailed information about the model's purpose, capabilities, and usage.
+	// It is represented as an array of Descriptors, allowing for rich, structured content.
+	Description []oci.Descriptor `json:"description,omitempty"`
+
+	// License contains the legal and usage terms associated with the model.
+	// It includes policies and grants that govern how the model can be used, distributed, or modified.
+	// Represented as an array of Descriptors to accommodate multiple or complex licensing terms.
+	License []oci.Descriptor `json:"license,omitempty"`
+
+	// Extensions allows for the inclusion of additional, model-specific configuration details.
+	// Each extension is represented by a Descriptor, enabling flexible and extensible metadata.
+	// This field accommodates unique requirements or features of different model types, such as:
+	// - Generation configuration: Parameters for text generation in language models
+	// - Quantization configuration: Details about model weight quantization
+	// - Transformer configuration: Specific architectural details for transformer models
+	// - Domain-specific settings: Configurations relevant to particular application domains
+	// The use of Descriptors ensures that each extension can be properly identified and processed,
+	// allowing for seamless integration of diverse model configurations within a unified structure.
+	Extensions []oci.Descriptor `json:"extensions,omitempty"`
+}
diff --git a/specs-go/v2/engine.go b/specs-go/v2/engine.go
@@ -0,0 +1,32 @@
+package v2
+
+import oci "github.com/opencontainers/image-spec/specs-go/v1"
+
+// Engine provides the structure for the `application/vnd.cnai.models.engine.v0+json` mediatype when marshalled to JSON.
+// It encapsulates the details necessary to describe and configure the execution environment for a model.
+type Engine struct {
+	// Name specifies the engine or framework used, such as 'transformers', 'tensorrt', or 'vllm'.
+	// This field is crucial for identifying the runtime environment required for the model.
+	Name string `json:"name,omitempty"`
+
+	// Version indicates the specific version of the engine or framework.
+	// Examples include '4.44.0', '8.10', '1.0', etc. This ensures compatibility and reproducibility.
+	Version string `json:"version,omitempty"`
+
+	// Dependencies lists the additional packages or libraries required by the engine.
+	// This optional field is used to specify and install necessary components for the engine's operation.
+	Dependencies []string `json:"dependencies,omitempty"`
+
+	// Environment defines key-value pairs for environment variables.
+	// These variables are used to configure the runtime environment for the engine executor.
+	Environment map[string]string `json:"environment,omitempty"`
+
+	// EntryPoint specifies the command or script to initiate the engine.
+	// This optional field provides the starting point for executing the model within the engine.
+	EntryPoint string `json:"entrypoint,omitempty"`
+
+	// Extensions allows for additional, engine-specific configuration details.
+	// Each extension is represented by a Descriptor, enabling flexible and extensible metadata
+	// to accommodate unique requirements or features of different engine types.
+	Extensions []oci.Descriptor `json:"extensions,omitempty"`
+}
diff --git a/specs-go/v2/manifest.go b/specs-go/v2/manifest.go
@@ -0,0 +1,36 @@
+package v2
+
+import (
+	oci "github.com/opencontainers/image-spec/specs-go/v1"
+)
+
+// Manifest represents the structure for the `application/vnd.cncf.cnai.models.manifest.v2+json` mediatype when marshalled to JSON.
+// It encapsulates all the essential components and metadata for a machine learning model.
+type Manifest struct {
+	// Version specifies the version of the manifest schema.
+	Version string `json:"version"`
+
+	// MediaType indicates the specific type of this document's data structure.
+	// It should be set to `application/vnd.cnai.models.manifest.v2+json` or an applicable IANA media type.
+	MediaType string `json:"mediaType"`
+
+	// Config references the configuration object for the model.
+	// This JSON blob contains essential setup information used by the runtime.
+	Config Config `json:"config"`
+
+	// Processor references the pre-processor object(s) by digest.
+	// It's used for any data preparation or transformation required before model inference.
+	Processor []oci.Descriptor `json:"processor"`
+
+	// Weights references the model's weight object by digest.
+	// These are typically binary blobs containing the trained parameters of the model.
+	Weights Weights `json:"weights"`
+
+	// Engine is an optional field that references the engine object by digest.
+	// The engine structure contains information for setting up the runtime environment.
+	Engine Engine `json:"engine,omitempty"`
+
+	// Annotations is an optional map for storing arbitrary metadata related to the model manifest.
+	// This can include information like creation date, author, or custom tags.
+	Annotations map[string]string `json:"annotations,omitempty"`
+}