Improve: Support different models with Swift

ashvardanian · Apr 17, 2024 · 479ae61 · 479ae61
1 parent 94ebd6e
commit 479ae61
Show file tree

Hide file tree

Showing 4 changed files with 56 additions and 20 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -42,7 +42,8 @@
         "uform",
         "unimodal",
         "unsqueeze",
-        "Vardanian"
+        "Vardanian",
+        "whitespaces"
     ],
     "[python]": {
         "editor.defaultFormatter": "ms-python.black-formatter"

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -20,6 +20,13 @@ pytest python/scripts/ -s -x -Wd -v -k onnx # To run only ONNX tests without loa
 
 ## Swift
 
+To build and test the Swift package, use the following command:
+
+```bash
+swift build
+swift test
+```
+
 Swift formatting is enforced with `swift-format` default utility from Apple.
 To install and run it on all the files in the project, use the following command:
 

diff --git a/swift/Encoders.swift b/swift/Encoders.swift
@@ -11,7 +11,6 @@ import Foundation
 import Hub  // `Config`
 import Tokenizers  // `AutoTokenizer`
 
-
 enum EncoderError: Error {
     case configLoadingError(String)
     case modelLoadingError(String)
@@ -21,7 +20,6 @@ enum EncoderError: Error {
     case modelPredictionFailed(String)
 }
 
-
 public enum Embedding {
     case i32s([Int32])
     case f16s([Float16])
@@ -116,16 +114,22 @@ public class TextEncoder {
         let finalConfigPath = configPath ?? modelPath + "/config.json"
         let finalTokenizerPath = tokenizerPath ?? modelPath + "/tokenizer.json"
         self.model = try readModel(fromPath: modelPath)
-        self.processor = try TextProcessor(configPath: finalConfigPath, tokenizerPath: finalTokenizerPath, model: self.model)
+        self.processor = try TextProcessor(
+            configPath: finalConfigPath,
+            tokenizerPath: finalTokenizerPath,
+            model: self.model
+        )
     }
 
-
     public init(modelName: String, hubApi: HubApi = .shared) async throws {
         let repo = Hub.Repo(id: modelName)
-        let modelURL = try await hubApi.snapshot(from: repo, matching: ["text.mlpackage/*", "config.json", "tokenizer.json"])
+        let modelURL = try await hubApi.snapshot(
+            from: repo,
+            matching: ["text_encoder.mlpackage/*", "config.json", "tokenizer.json"]
+        )
         let configPath = modelURL.appendingPathComponent("config.json").path
         let tokenizerPath = modelURL.appendingPathComponent("tokenizer.json").path
-        self.model = try readModel(fromURL: modelURL.appendingPathComponent("text.mlpackage", isDirectory: true))
+        self.model = try readModel(fromURL: modelURL.appendingPathComponent("text_encoder.mlpackage", isDirectory: true))
         self.processor = try TextProcessor(configPath: configPath, tokenizerPath: tokenizerPath, model: self.model)
     }
 
@@ -158,12 +162,12 @@ public class ImageEncoder {
 
     public init(modelName: String, hubApi: HubApi = .shared) async throws {
         let repo = Hub.Repo(id: modelName)
-        let modelURL = try await hubApi.snapshot(from: repo, matching: ["image.mlpackage/*", "config.json"])
+        let modelURL = try await hubApi.snapshot(from: repo, matching: ["image_encoder.mlpackage/*", "config.json"])
         let configPath = modelURL.appendingPathComponent("config.json").path
-        self.model = try readModel(fromURL: modelURL.appendingPathComponent("image.mlpackage", isDirectory: true))
+        self.model = try readModel(fromURL: modelURL.appendingPathComponent("image_encoder.mlpackage", isDirectory: true))
         self.processor = try ImageProcessor(configPath: configPath)
     }
-    
+
     public func forward(with image: CGImage) throws -> Embedding {
         let inputFeatureProvider = try self.processor.preprocess(image)
         let prediction = try self.model.prediction(from: inputFeatureProvider)
@@ -240,7 +244,7 @@ class ImageProcessor {
         if let imageEncoderConfig = configDict["image_encoder"] as? [String: Any] {
             configDict = imageEncoderConfig
         }
-        
+
         let config = Config(configDict)
         self.imageSize = config.imageSize!.intValue!
     }

diff --git a/swift/EncodersTests.swift b/swift/EncodersTests.swift
@@ -1,11 +1,23 @@
 import CoreGraphics
+import Hub
 import ImageIO
 import UForm
-import Hub
 import XCTest
 
 final class TokenizerTests: XCTestCase {
 
+    var hfToken: String?
+
+    override func setUp() {
+        super.setUp()
+        // Attempt to load the Hugging Face token from the `.hf_token` file in the current directory
+        let fileURL = URL(fileURLWithPath: FileManager.default.currentDirectoryPath).appendingPathComponent(".hf_token")
+        if let token = try? String(contentsOf: fileURL, encoding: .utf8).trimmingCharacters(in: .whitespacesAndNewlines)
+        {
+            hfToken = token
+        }
+    }
+
     func cosineSimilarity<T: FloatingPoint>(between vectorA: [T], and vectorB: [T]) -> T {
         guard vectorA.count == vectorB.count else {
             fatalError("Vectors must be of the same length.")
@@ -23,9 +35,9 @@ final class TokenizerTests: XCTestCase {
         return dotProduct / (magnitudeA * magnitudeB)
     }
 
-    func testTextEmbeddings() async throws {
+    func testTextEmbeddings(forModel modelName: String) async throws {
 
-        let api = HubApi(hfToken: "xxx")
+        let api = HubApi(hfToken: hfToken)
         let textModel = try await TextEncoder(
             modelName: "unum-cloud/uform3-image-text-english-small",
             hubApi: api
@@ -60,29 +72,35 @@ final class TokenizerTests: XCTestCase {
         )
     }
 
-    func testImageEmbeddings() async throws {
+    func testTextEmbeddings() async throws {
+        for model in ["unum-cloud/uform3-image-text-english-small"] {
+            try await testTextEmbeddings(forModel: model)
+        }
+    }
+
+    func testImageEmbeddings(forModel modelName: String) async throws {
 
         // One option is to use a local model repository.
         //
         //        let root = "uform/"
         //        let textModel = try TextEncoder(
-        //            modelPath: root + "uform-vl-english-large-text.mlpackage",
+        //            modelPath: root + "uform-vl-english-large-text_encoder.mlpackage",
         //            configPath: root + "uform-vl-english-large-text.json",
         //            tokenizerPath: root + "uform-vl-english-large-text.tokenizer.json"
         //        )
         //        let imageModel = try ImageEncoder(
-        //            modelPath: root + "uform-vl-english-large-image.mlpackage",
+        //            modelPath: root + "uform-vl-english-large-image_encoder.mlpackage",
         //            configPath: root + "uform-vl-english-large-image.json"
         //        )
         //
         // A better option is to fetch directly from HuggingFace, similar to how users would do that:
-        let api = HubApi(hfToken: "xxx")
+        let api = HubApi(hfToken: hfToken)
         let textModel = try await TextEncoder(
-            modelName: "unum-cloud/uform3-image-text-english-small",
+            modelName: modelName,
             hubApi: api
         )
         let imageModel = try await ImageEncoder(
-            modelName: "unum-cloud/uform3-image-text-english-small",
+            modelName: modelName,
             hubApi: api
         )
 
@@ -143,4 +161,10 @@ final class TokenizerTests: XCTestCase {
         }
     }
 
+    func testImageEmbeddings() async throws {
+        for model in ["unum-cloud/uform3-image-text-english-small"] {
+            try await testImageEmbeddings(forModel: model)
+        }
+    }
+
 }