-m gguf model taking required path, refs #26

simonw · Dec 9, 2023 · 542643f · 542643f
1 parent 6d19096
commit 542643f
Showing 1 changed file with 23 additions and 1 deletion.
diff --git a/llm_llama_cpp.py b/llm_llama_cpp.py
@@ -64,6 +64,7 @@ def register_models(register):
             ),
             aliases=details["aliases"],
         )
+    register(LlamaGGUF())
 
 
 @llm.hookimpl
@@ -245,6 +246,9 @@ def build_llama2_chat_prompt(self, prompt, conversation):
         prompt_bits.append(f"{prompt.prompt} [/INST] ")
         return prompt_bits
 
+    def get_path(self, options):
+        return self.path
+
     def execute(self, prompt, stream, response, conversation):
         with SuppressOutput(verbose=prompt.options.verbose):
             kwargs = {"n_ctx": prompt.options.n_ctx or 4000, "n_gpu_layers": 1}
@@ -253,7 +257,9 @@ def execute(self, prompt, stream, response, conversation):
             if prompt.options.n_gpu_layers:
                 kwargs["n_gpu_layers"] = prompt.options.n_gpu_layers
             llm_model = Llama(
-                model_path=self.path, verbose=prompt.options.verbose, **kwargs
+                model_path=self.get_path(prompt.options),
+                verbose=prompt.options.verbose,
+                **kwargs,
             )
             if self.is_llama2_chat:
                 prompt_bits = self.build_llama2_chat_prompt(prompt, conversation)
@@ -272,6 +278,22 @@ def execute(self, prompt, stream, response, conversation):
                 yield item["choices"][0]["text"]
 
 
+class LlamaGGUF(LlamaModel):
+    model_id = "gguf"
+    is_llama2_chat = False
+
+    class Options(LlamaModel.Options):
+        path: str = Field(
+            description="Path to a model GGUF file",
+        )
+
+    def __init__(self):
+        pass
+
+    def get_path(self, options):
+        return options.path
+
+
 def human_size(num_bytes):
     """Return a human readable byte size."""
     for unit in ["B", "KB", "MB", "GB", "TB", "PB"]: