ludwig-ai · Infernaught · Sep 15, 2023 · Sep 12, 2023 · Sep 13, 2023 · Sep 13, 2023
@@ -1,3 +1,4 @@
+import os
 from dataclasses import field
 
 from marshmallow import fields, ValidationError
@@ -45,17 +46,20 @@ def validate(model_name: str):
         """Validates and upgrades the given model name to its full path, if applicable.
 
         If the name exists in `MODEL_PRESETS`, returns the corresponding value from the dict; otherwise checks if the
-        given name (which should be a full path) exists in the transformers library.
+        given name (which should be a full path) exists locally or in the transformers library.
         """
         if isinstance(model_name, str):
             if model_name in MODEL_PRESETS:
                 return MODEL_PRESETS[model_name]
+            if os.path.isdir(model_name):
+                return model_name
             try:
                 AutoConfig.from_pretrained(model_name)
                 return model_name
             except OSError:
                 raise ConfigValidationError(
-                    f"Specified base model `{model_name}` is not a valid pretrained CausalLM listed on huggingface. "
+                    f"Specified base model `{model_name}` is not a valid pretrained CausalLM listed on huggingface "
+                    "or a valid local directory containing the weights for a pretrained CausalLM from huggingface."
                     "Please see: https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads"
                 )
         raise ValidationError(

@@ -623,3 +623,38 @@ def test_global_max_sequence_length_for_llms():
 
     # Check that the value can never be larger than the model's context_len
     assert model.global_max_sequence_length == 2048
+
+
+def test_local_path_loading():
+    """Tests that local paths can be used to load models."""
+
+    from huggingface_hub import snapshot_download
+
+    # Download the model to a local directory
+    LOCAL_PATH = "~/test_local_path_loading"
+    REPO_ID = "HuggingFaceH4/tiny-random-LlamaForCausalLM"
+    os.makedirs(LOCAL_PATH, exist_ok=True)
+    snapshot_download(repo_id=REPO_ID, local_dir=LOCAL_PATH)
+
+    # Load the model using the local path
+    config1 = {
+        MODEL_TYPE: MODEL_LLM,
+        BASE_MODEL: LOCAL_PATH,
+        INPUT_FEATURES: [text_feature(name="input", encoder={"type": "passthrough"})],
+        OUTPUT_FEATURES: [text_feature(name="output")],
+    }
+    config_obj1 = ModelConfig.from_dict(config1)
+    model1 = LLM(config_obj1)
+
+    # Load the model using the repo id
+    config2 = {
+        MODEL_TYPE: MODEL_LLM,
+        BASE_MODEL: REPO_ID,
+        INPUT_FEATURES: [text_feature(name="input", encoder={"type": "passthrough"})],
+        OUTPUT_FEATURES: [text_feature(name="output")],
+    }
+    config_obj2 = ModelConfig.from_dict(config2)
+    model2 = LLM(config_obj2)
+
+    # Check that the models are the same
+    assert _compare_models(model1.model, model2.model)