Skip to content

Commit

Permalink
Break: Deprecate old ONNX structure
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvardanian committed Apr 17, 2024
1 parent 38949f3 commit 94ebd6e
Show file tree
Hide file tree
Showing 14 changed files with 430 additions and 240 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ node_modules
*.onnx
*.pt
*.safetensors
*.mlpackage
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
{
"cSpell.words": [
"arange",
"astype",
"CFURL",
"coreml",
"crossattn",
"cumsum",
"dtype",
"embs",
Expand All @@ -25,12 +27,17 @@
"pretrained",
"probs",
"pypi",
"pytest",
"randn",
"rerank",
"reranker",
"reranking",
"sandbeach",
"sess",
"SIMD",
"softmax",
"Tensorrt",
"torchvision",
"transfromers",
"uform",
"unimodal",
Expand Down
4 changes: 2 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ let package = Package(
.product(name: "Transformers", package: "swift-transformers")
],
path: "swift",
exclude: ["EmbeddingsTests.swift"]
exclude: ["EncodersTests.swift"]
),
.testTarget(
name: "UFormTests",
dependencies: ["UForm"],
path: "swift",
sources: ["EmbeddingsTests.swift"]
sources: ["EncodersTests.swift"]
),
]
)
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ classifiers = [
dependencies = [
"huggingface_hub>=0.16.4",
"tokenizers>=0.13.3",
"pillow"
"pillow",
"simsimd",
]
description = "Pocket-Sized Multimodal AI for Content Understanding and Generation"
maintainers = [
Expand Down
201 changes: 142 additions & 59 deletions python/scripts/export_encoders.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scripts for Exporting PyTorch Models to ONNX and CoreML"
"# Scripts for Exporting PyTorch Models to ONNX and CoreML\n",
"\n",
"Depending on the backend, we prefer different qunatization schemes.\n",
"\n",
"- For ONNX we use `int8` quantization.\n",
"- For PyTorch we use `bfloat16` quantization.\n",
"- For CoreML we use `float32` representation."
]
},
{
Expand Down Expand Up @@ -181,12 +187,12 @@
"coreml_model = ct.convert(\n",
" traced_script_module, source=\"pytorch\",\n",
" inputs=[image_input], outputs=[image_features, image_embeddings],\n",
" convert_to='mlprogram', compute_precision=ct.precision)\n",
" convert_to='mlprogram', compute_precision=precision)\n",
"\n",
"coreml_model.author = 'Unum Cloud'\n",
"coreml_model.license = 'Apache 2.0'\n",
"coreml_model.short_description = 'Pocket-Sized Multimodal AI for Content Understanding'\n",
"coreml_model.save(\"../uform-vl-english-small-image.mlpackage\")"
"coreml_model.save(os.path.join(output_directory, \"image_encoder.mlpackage\"))"
]
},
{
Expand Down Expand Up @@ -217,7 +223,7 @@
"coreml_model.author = 'Unum Cloud'\n",
"coreml_model.license = 'Apache 2.0'\n",
"coreml_model.short_description = 'Pocket-Sized Multimodal AI for Content Understanding'\n",
"coreml_model.save(\"../uform-vl-english-small-text.mlpackage\")"
"coreml_model.save(os.path.join(output_directory, \"text_encoder.mlpackage\"))"
]
},
{
Expand Down Expand Up @@ -260,7 +266,7 @@
"metadata": {},
"outputs": [],
"source": [
"torch.save(model.image_encoder.state_dict(), 'image.pt')"
"torch.save(model.image_encoder.state_dict(), os.path.join(output_directory, \"image_encoder.pt\"))"
]
},
{
Expand All @@ -269,7 +275,7 @@
"metadata": {},
"outputs": [],
"source": [
"save_file(model.image_encoder.state_dict(), \"image.safetensors\")"
"save_file(model.image_encoder.state_dict(), os.path.join(output_directory, \"image_encoder.safetensors\"))"
]
},
{
Expand All @@ -288,7 +294,7 @@
"metadata": {},
"outputs": [],
"source": [
"torch.save(model.text_encoder.state_dict(), 'text.pt')"
"torch.save(model.text_encoder.state_dict(), os.path.join(output_directory, \"text_encoder.pt\"))"
]
},
{
Expand All @@ -297,7 +303,7 @@
"metadata": {},
"outputs": [],
"source": [
"save_file(model.text_encoder.state_dict(), \"text.safetensors\")"
"save_file(model.text_encoder.state_dict(), os.path.join(output_directory, \"text_encoder.safetensors\"))"
]
},
{
Expand All @@ -312,26 +318,6 @@
"image_features.shape, text_features.shape, image_embedding.shape, text_embedding.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!huggingface-cli upload unum-cloud/uform2-vl-english-small image.safetensors image.safetensors\n",
"!huggingface-cli upload unum-cloud/uform2-vl-english-small text.safetensors text.safetensors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!huggingface-cli upload unum-cloud/uform2-vl-english-small image.pt image.pt\n",
"!huggingface-cli upload unum-cloud/uform2-vl-english-small text.pt text.pt"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -354,7 +340,8 @@
"metadata": {},
"outputs": [],
"source": [
"from torch.onnx import export as onnx_export"
"from torch.onnx import export as onnx_export\n",
"import torch"
]
},
{
Expand All @@ -378,7 +365,7 @@
"onnx_export(\n",
" module,\n",
" (text_data[\"input_ids\"], text_data[\"attention_mask\"]), \n",
" \"text.onnx\", \n",
" os.path.join(output_directory, \"text_encoder.onnx\"), \n",
" export_params=True,\n",
" opset_version=15,\n",
" do_constant_folding=True,\n",
Expand All @@ -391,27 +378,6 @@
" 'embeddings' : {0 : 'batch_size'}})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's use [additional ONNX tooling](https://onnxruntime.ai/docs/performance/model-optimizations/float16.html#mixed-precision) to convert to half-precision."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import onnx\n",
"from onnxconverter_common import float16\n",
"\n",
"module = onnx.load(\"text.onnx\")\n",
"module_fp16 = float16.convert_float_to_float16(module)\n",
"onnx.save(module_fp16, \"text.onnx\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -433,7 +399,7 @@
"torch.onnx.export(\n",
" module,\n",
" image_data, \n",
" \"image.onnx\", \n",
" os.path.join(output_directory, \"image_encoder.onnx\"), \n",
" export_params=True,\n",
" opset_version=15,\n",
" do_constant_folding=True,\n",
Expand All @@ -445,18 +411,131 @@
" 'embeddings' : {0 : 'batch_size'}})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Quantizing to `float16`\n",
"\n",
"Let's use [additional ONNX tooling](https://onnxruntime.ai/docs/performance/model-optimizations/float16.html#mixed-precision) to convert to half-precision."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import onnx\n",
"from onnxconverter_common import float16\n",
"\n",
"module = onnx.load(\"image.onnx\")\n",
"from onnxconverter_common import float16"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.join(output_directory, \"text_encoder.onnx\")\n",
"module = onnx.load(module_path)\n",
"module_fp16 = float16.convert_float_to_float16(module)\n",
"onnx.save(module_fp16, module_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.join(output_directory, \"image_encoder.onnx\")\n",
"module = onnx.load(module_path)\n",
"module_fp16 = float16.convert_float_to_float16(module)\n",
"onnx.save(module_fp16, \"image.onnx\")"
"onnx.save(module_fp16, module_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Quantizing to `uint8`\n",
"\n",
"We can further quantize the model into `uint8` using ONNX quantization tools.\n",
"The `int8` is default variant, but [some of the operators don't support it](https://github.com/microsoft/onnxruntime/issues/15888)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from onnxruntime.quantization import quantize_dynamic, QuantType"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.join(output_directory, \"text_encoder.onnx\")\n",
"quantize_dynamic(module_path, module_path, weight_type=QuantType.QUInt8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.join(output_directory, \"image_encoder.onnx\")\n",
"quantize_dynamic(module_path, module_path, weight_type=QuantType.QUInt8)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's check that the runtime can actually load those models."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import onnxruntime as ort\n",
"session_options = ort.SessionOptions()\n",
"session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.join(output_directory, \"text_encoder.onnx\")\n",
"session = ort.InferenceSession(module_path, sess_options=session_options)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.join(output_directory, \"image_encoder.onnx\")\n",
"session = ort.InferenceSession(module_path, sess_options=session_options)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Upload to Hugging Face"
]
},
{
Expand All @@ -465,8 +544,12 @@
"metadata": {},
"outputs": [],
"source": [
"!huggingface-cli upload unum-cloud/uform2-vl-english-small image.onnx image.onnx\n",
"!huggingface-cli upload unum-cloud/uform2-vl-english-small text.onnx text.onnx"
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../image_encoder.onnx image_encoder.onnx\n",
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../text_encoder.onnx text_encoder.onnx\n",
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../image_encoder.safetensors image_encoder.safetensors\n",
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../text_encoder.safetensors text_encoder.safetensors\n",
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../image_encoder.pt image_encoder.pt\n",
"!huggingface-cli upload unum-cloud/uform3-image-text-english-small ../../text_encoder.pt text_encoder.pt"
]
}
],
Expand All @@ -486,7 +569,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.10.11"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 94ebd6e

Please sign in to comment.