fallback to non-metal variant for unsupported models

nomic-ai · Jun 9, 2023 · d782f2b · d782f2b
1 parent 4b4056d
commit d782f2b
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 15 deletions.
diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
@@ -234,7 +234,30 @@ DLL_EXPORT bool magic_match(std::istream& f) {
     // Check version
     uint32_t version = 0;
     f.read(reinterpret_cast<char*>(&version), sizeof(version));
-    return version LLAMA_VERSIONS;
+    if (!(version LLAMA_VERSIONS)) {
+        return false;
+    }
+#ifdef GGML_USE_METAL
+    // Check quant supported on metal
+    // skip fields
+    off_t offset = sizeof(uint32_t) * 6; // n_vocab, n_embd, n_mult, n_head, n_layer, n_rot
+    f.seekg(offset, std::ios_base::cur);
+    uint32_t ftype;
+    f.read(reinterpret_cast<char*>(&ftype), sizeof(ftype)); // ftype
+    switch((enum llama_ftype) ftype) {
+        // currently supported on Metal https://github.com/ggerganov/llama.cpp/blob/ae9663f1887513e152839e91f61c513075a19422/ggml-metal.m#L51-L55
+        case LLAMA_FTYPE_MOSTLY_F16:
+        case LLAMA_FTYPE_MOSTLY_Q2_K:
+        case LLAMA_FTYPE_MOSTLY_Q4_0:
+        case LLAMA_FTYPE_MOSTLY_Q6_K:
+        case LLAMA_FTYPE_MOSTLY_Q4_K_S:
+        case LLAMA_FTYPE_MOSTLY_Q4_K_M:
+            return true;
+        default: // unsupported quant-type for Metal
+            return false;
+    }
+#endif
+    return true;
 }
 
 DLL_EXPORT LLModel *construct() {

diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp
@@ -121,24 +121,30 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
     if (!has_at_least_minimal_hardware())
         return nullptr;
 
-    //TODO: Auto-detect CUDA/OpenCL
-    if (buildVariant == "auto") {
-#if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
-        buildVariant = "metal";
-#else
-        if (requires_avxonly()) {
-            buildVariant = "avxonly";
-        } else {
-            buildVariant = "default";
-        }
-#endif
-    }
     // Read magic
     std::ifstream f(modelPath, std::ios::binary);
     if (!f) return nullptr;
     // Get correct implementation
-    auto impl = implementation(f, buildVariant);
-    if (!impl) return nullptr;
+    const LLModel::Implementation* impl = nullptr;
+
+    #if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
+        if (buildVariant == "auto") {
+            impl = implementation(f, "metal");
+        }
+    #endif
+
+    if (!impl) {
+        //TODO: Auto-detect CUDA/OpenCL
+        if (buildVariant == "auto") {
+            if (requires_avxonly()) {
+                buildVariant = "avxonly";
+            } else {
+                buildVariant = "default";
+            }
+        }
+        impl = implementation(f, buildVariant);
+        if (!impl) return nullptr;
+    }
     f.close();
     // Construct and return llmodel implementation
     return impl->construct();