From 0996149911458ce9821aa49e10db4e7c1187486d Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Wed, 26 Jun 2024 22:10:12 -0400
Subject: [PATCH] convert-hf : allow converting the weird BitNet 1.3B

Its FFN size is 5460 which is not convenient.
The offending tensors are kept in F16,
which makes the final model 5.01 bpw.
---
 convert-hf-to-gguf.py  | 16 ++++++++++------
 gguf-py/gguf/quants.py |  4 ++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 9f4094194b4a3..2bf0967ce4f91 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -301,12 +301,16 @@ def write_tensors(self):
                 if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32:
                     # TODO: cleaner model-specific per-tensor types
                     # NOTE: Q1_3 is only relevant for BitNet 1.58b
-                    if self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 and not any(
-                        self.match_model_tensor_name(new_name, key, None)
-                        for key in [
-                            gguf.MODEL_TENSOR.TOKEN_EMBD,
-                            gguf.MODEL_TENSOR.OUTPUT,
-                        ]
+                    if (
+                        self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3
+                        and gguf.can_quantize_to_q1_3(data)
+                        and not any(
+                            self.match_model_tensor_name(new_name, key, None)
+                            for key in [
+                                gguf.MODEL_TENSOR.TOKEN_EMBD,
+                                gguf.MODEL_TENSOR.OUTPUT,
+                            ]
+                        )
                     ):
                         data = gguf.quantize_q1_3(data)
                         assert data.dtype == np.uint8
diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
index c66b83b3f8283..c96e6a34361e4 100644
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@@ -126,6 +126,10 @@ def quantize_q8_0(data: np.ndarray):
 __q1_3_block_size, __q1_3_type_size = GGML_QUANT_SIZES[GGMLQuantizationType.Q1_3]
 
 
+def can_quantize_to_q1_3(n: np.ndarray) -> bool:
+    return n.shape[-1] % __q1_3_block_size == 0
+
+
 def __quantize_q1_3_shape_change(s: tuple[int, ...]) -> tuple[int, ...]:
     return (*s[:-1], s[-1] // __q1_3_block_size * __q1_3_type_size)