Skip to content

Commit

Permalink
fallback to non-metal variant for unsupported models
Browse files Browse the repository at this point in the history
  • Loading branch information
apage43 committed Jun 9, 2023
1 parent 4b4056d commit d782f2b
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 15 deletions.
25 changes: 24 additions & 1 deletion gpt4all-backend/llamamodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,30 @@ DLL_EXPORT bool magic_match(std::istream& f) {
// Check version
uint32_t version = 0;
f.read(reinterpret_cast<char*>(&version), sizeof(version));
return version LLAMA_VERSIONS;
if (!(version LLAMA_VERSIONS)) {
return false;
}
#ifdef GGML_USE_METAL
// Check quant supported on metal
// skip fields
off_t offset = sizeof(uint32_t) * 6; // n_vocab, n_embd, n_mult, n_head, n_layer, n_rot
f.seekg(offset, std::ios_base::cur);
uint32_t ftype;
f.read(reinterpret_cast<char*>(&ftype), sizeof(ftype)); // ftype
switch((enum llama_ftype) ftype) {
// currently supported on Metal https://github.com/ggerganov/llama.cpp/blob/ae9663f1887513e152839e91f61c513075a19422/ggml-metal.m#L51-L55
case LLAMA_FTYPE_MOSTLY_F16:
case LLAMA_FTYPE_MOSTLY_Q2_K:
case LLAMA_FTYPE_MOSTLY_Q4_0:
case LLAMA_FTYPE_MOSTLY_Q6_K:
case LLAMA_FTYPE_MOSTLY_Q4_K_S:
case LLAMA_FTYPE_MOSTLY_Q4_K_M:
return true;
default: // unsupported quant-type for Metal
return false;
}
#endif
return true;
}

DLL_EXPORT LLModel *construct() {
Expand Down
34 changes: 20 additions & 14 deletions gpt4all-backend/llmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,24 +121,30 @@ LLModel *LLModel::construct(const std::string &modelPath, std::string buildVaria
if (!has_at_least_minimal_hardware())
return nullptr;

//TODO: Auto-detect CUDA/OpenCL
if (buildVariant == "auto") {
#if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
buildVariant = "metal";
#else
if (requires_avxonly()) {
buildVariant = "avxonly";
} else {
buildVariant = "default";
}
#endif
}
// Read magic
std::ifstream f(modelPath, std::ios::binary);
if (!f) return nullptr;
// Get correct implementation
auto impl = implementation(f, buildVariant);
if (!impl) return nullptr;
const LLModel::Implementation* impl = nullptr;

#if defined(__APPLE__) && defined(__arm64__) // FIXME: See if metal works for intel macs
if (buildVariant == "auto") {
impl = implementation(f, "metal");
}
#endif

if (!impl) {
//TODO: Auto-detect CUDA/OpenCL
if (buildVariant == "auto") {
if (requires_avxonly()) {
buildVariant = "avxonly";
} else {
buildVariant = "default";
}
}
impl = implementation(f, buildVariant);
if (!impl) return nullptr;
}
f.close();
// Construct and return llmodel implementation
return impl->construct();
Expand Down

0 comments on commit d782f2b

Please sign in to comment.