From 0e618a50a2f5077cc52a048de7df09cba89ee912 Mon Sep 17 00:00:00 2001
From: Alex O'Connell <git@alexoconnell.net>
Date: Thu, 21 Mar 2024 20:16:27 -0400
Subject: [PATCH 1/8] properly class variables

---
 custom_components/llama_conversation/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/custom_components/llama_conversation/__init__.py b/custom_components/llama_conversation/__init__.py
index 719847e..fbe354d 100644
--- a/custom_components/llama_conversation/__init__.py
+++ b/custom_components/llama_conversation/__init__.py
@@ -181,10 +181,9 @@ def __init__(self, hass: HomeAssistant, entry: ConfigEntry) -> None:
             CONF_BACKEND_TYPE, DEFAULT_BACKEND_TYPE
         )
 
+        self.in_context_examples = None
         if entry.options.get(CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES, DEFAULT_USE_IN_CONTEXT_LEARNING_EXAMPLES):
             self._load_icl_examples()
-        else:
-            self.in_context_examples = None
 
         self._load_model(entry)
 
@@ -567,6 +566,7 @@ def _load_model(self, entry: ConfigEntry) -> None:
             # n_threads_batch=4,
         )
 
+        self.grammar = None
         if entry.options.get(CONF_USE_GBNF_GRAMMAR, DEFAULT_USE_GBNF_GRAMMAR):
             self._load_grammar()
 

From d51c172c0746baab62b890b14bd850e53c277031 Mon Sep 17 00:00:00 2001
From: Isabella Nightshade <47324660+xBelladonna@users.noreply.github.com>
Date: Fri, 22 Mar 2024 23:29:34 +1100
Subject: [PATCH 2/8] Small fixes to brightness adjustment and EOS token
 removal (#95)

---
 custom_components/llama_conversation/__init__.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/custom_components/llama_conversation/__init__.py b/custom_components/llama_conversation/__init__.py
index fbe354d..32ed057 100644
--- a/custom_components/llama_conversation/__init__.py
+++ b/custom_components/llama_conversation/__init__.py
@@ -234,6 +234,8 @@ async def async_process(
         """Process a sentence."""
 
         raw_prompt = self.entry.options.get(CONF_PROMPT, DEFAULT_PROMPT)
+        prompt_template = self.entry.options.get(CONF_PROMPT_TEMPLATE, DEFAULT_PROMPT_TEMPLATE)
+        template_desc = PROMPT_TEMPLATE_DESCRIPTIONS[prompt_template]
         refresh_system_prompt = self.entry.options.get(CONF_REFRESH_SYSTEM_PROMPT, DEFAULT_REFRESH_SYSTEM_PROMPT)
         remember_conversation = self.entry.options.get(CONF_REMEMBER_CONVERSATION, DEFAULT_REMEMBER_CONVERSATION)
         remember_num_interactions = self.entry.options.get(CONF_REMEMBER_NUM_INTERACTIONS, False)
@@ -344,7 +346,7 @@ async def async_process(
 
                 # fix certain arguments
                 # make sure brightness is 0-255 and not a percentage
-                if "brightness" in extra_arguments and 0.0 < extra_arguments["brightness"] < 1.0:
+                if "brightness" in extra_arguments and 0.0 < extra_arguments["brightness"] <= 1.0:
                     extra_arguments["brightness"] = int(extra_arguments["brightness"] * 255)
 
                 # convert string "tuple" to a list for RGB colors
@@ -373,7 +375,8 @@ async def async_process(
                         to_say += f"\nFailed to run: {line}"
                         _LOGGER.exception(f"Failed to run: {line}")
 
-        to_say = to_say.replace("<|im_end|>", "") # remove the eos token if it is returned (some backends + the old model does this)
+        if template_desc["assistant"]["suffix"]:
+            to_say = to_say.replace(template_desc["assistant"]["suffix"], "") # remove the eos token if it is returned (some backends + the old model does this)
         
         intent_response = intent.IntentResponse(language=user_input.language)
         intent_response.async_set_speech(to_say)

From 0a6b41d5bc23031950adadb1725b8dbebcfe984f Mon Sep 17 00:00:00 2001
From: Alex O'Connell <s.alex.oconnell@gmail.com>
Date: Sat, 23 Mar 2024 10:09:42 -0400
Subject: [PATCH 3/8] support many checkpoints in evaluate

---
 docs/experiment-notes-stablelm.md |  30 +++++-
 evaluate.py                       | 170 ++++++++++++++++--------------
 train.py                          |  20 +++-
 3 files changed, 137 insertions(+), 83 deletions(-)

diff --git a/docs/experiment-notes-stablelm.md b/docs/experiment-notes-stablelm.md
index 03d4f96..f790d55 100644
--- a/docs/experiment-notes-stablelm.md
+++ b/docs/experiment-notes-stablelm.md
@@ -138,4 +138,32 @@
   - 550: 
   - 600: 0.9473684210526315
   - 650: 0.9387651821862348
-  - Final: 0.9463562753036437
\ No newline at end of file
+  - Final: 0.9463562753036437
+
+
+## stablelm-2-1_6b-zephyr
+
+# rev3
+- full fine tune
+- epochs: 1
+- 2048 train ctx
+- batch size 32
+- learning rate 1e-5
+- weight decay 0.1
+- gradient clipping 1.0
+- dataset size: medium
++ evaluation results:
+  - 100: 0.35779352226720645
+  - 200: 0.5247975708502024
+  - 300: 0.5339068825910931
+  - 400: 0.6280364372469636
+  - 500: 0.6923076923076923
+  - 600: 0.7064777327935222
+  - 700: 0.7135627530364372
+  - 800: 0.7044534412955465
+  - 900: 0.707995951417004
+  - 1000: 0.718117408906882
+  - Final: 0.7145748987854251
+
+# rev4
+- dataset size: large
\ No newline at end of file
diff --git a/evaluate.py b/evaluate.py
index 22201fa..3e9bdb7 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -10,23 +10,11 @@
 CTX_SIZE = 2048
 
 """
-python3 evaluate.py stablehome-3b-rev7/checkpoint-50 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-100 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-150 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-200 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-250 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-300 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-350 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-400 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-450 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-500 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-550 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-600 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7/checkpoint-650 --batch-size 4 --lora && \
-  python3 evaluate.py stablehome-3b-rev7 --batch-size 4 --lora
+python3 evaluate.py stablehome-1_6b-rev3 --batch-size 8 --all-checkpoints
+python3 evaluate.py tinyhome-1b-rev1 --batch-size 8 --all-checkpoints
 """
 
-# TODO: auto detect all the checkpoints to run
+service_call_regex = re.compile(r"```homeassistant\n([\S \t\n]*?)```")
 
 def tokenize(tokenizer, prompt):
     return tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=CTX_SIZE)
@@ -38,72 +26,10 @@ def generate(model, tokenizer, prompts):
     text = tokenizer.batch_decode(outputs)
     return text
 
-def main():
-    parser = argparse.ArgumentParser(description="Evaluate the function calling for a model")
-    parser.add_argument("model")
-    parser.add_argument("--dataset_file", default="./data/home_assistant_test.jsonl")
-    parser.add_argument("--batch-size", default=8)
-    parser.add_argument("--lora", default=False,action='store_const', const=True)
-
-    args = parser.parse_args()
-    lora_folder = f"./loras/{args.model}"
-    model_folder = f"./models/{args.model}"
-
-    dataset = load_dataset("json", data_files={ "train": args.dataset_file })["train"]
-
-    print(f"Got {len(dataset)} examples to test")
-
-    # filter out examples that are status requests
-    if "text" in dataset:
-        dataset = dataset.filter(lambda example: "```homeassistant" in example["text"])
-    else:
-        dataset = dataset.filter(lambda example: "```homeassistant" in example["conversations"][2]["value"])
-
-    service_call_regex = re.compile(r"```homeassistant\n([\S \t\n]*?)```")
-
-    torch.set_default_device("cuda")
-    
-    if args.lora:
-        adapter_config = PeftConfig.from_pretrained(lora_folder)
-        base_model_name = adapter_config.base_model_name_or_path
-        print(f"Loading lora from {lora_folder} ({base_model_name})...")
-
-        base_model = AutoModelForCausalLM.from_pretrained(
-            base_model_name,
-            trust_remote_code=True,
-            torch_dtype=torch.bfloat16,
-        )
-        trained_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True, padding_side='left')
-
-        trained_model =  PeftModel.from_pretrained(base_model, lora_folder, trust_remote_code=True, torch_dtype=torch.bfloat16)
-
-        output_folder = lora_folder
-    else:
-        print(f"Loading model from {model_folder}...")
-        trained_model = AutoModelForCausalLM.from_pretrained(
-            model_folder,
-            trust_remote_code=True,
-            torch_dtype=torch.bfloat16,
-        )
-        trained_tokenizer = AutoTokenizer.from_pretrained(model_folder, trust_remote_code=True, padding_side='left')
-        output_folder = model_folder
-
-    trained_model.generation_config = GenerationConfig(
-        max_new_tokens=128,
-        use_cache=True,
-        do_sample=True,
-        temperature=0.1,
-        top_k=40,
-        top_p=1.0,
-        repetition_penalty=1.15,
-        eos_token_id=trained_model.config.eos_token_id,
-        pad_token_id=trained_model.config.pad_token_id if trained_model.config.pad_token_id else trained_model.config.eos_token_id,
-    )
-
+def evaluate(output_folder, trained_model, trained_tokenizer, dataset, batch_size):
     split = trained_tokenizer.apply_chat_template(conversation=[{"role": "assistant", "content":  r"%%%%%%%%%%%%%%%%"}], tokenize=False).split( r"%%%%%%%%%%%%%%%%")[0]
 
     print("Evaluating...")
-    batch_size = int(args.batch_size)
     correct_answers = 0
     total_answers = 0
     color_mismatches = 0
@@ -193,6 +119,94 @@ def main():
             "failed_examples": failed_examples,
         }, f, indent=4)
 
+def load_model(model_name, is_lora, checkpoint_name):
+    lora_folder = f"./loras/{model_name}/"
+    model_folder = f"./models/{model_name}/"
+    
+    # tokenizer isn't saved into checkpoint folders
+    tokenizer_folder = lora_folder if is_lora else model_folder
+
+    if checkpoint_name:
+        lora_folder = lora_folder + f"{checkpoint_name}/"
+        model_folder = model_folder + f"{checkpoint_name}/"
+
+    if is_lora:
+        adapter_config = PeftConfig.from_pretrained(lora_folder)
+        base_model_name = adapter_config.base_model_name_or_path
+        print(f"Loading lora from {lora_folder} ({base_model_name})...")
+
+        base_model = AutoModelForCausalLM.from_pretrained(
+            base_model_name,
+            trust_remote_code=True,
+            torch_dtype=torch.bfloat16,
+        )
+        trained_model =  PeftModel.from_pretrained(base_model, lora_folder, trust_remote_code=True, torch_dtype=torch.bfloat16)
+
+        output_folder = lora_folder
+    else:
+        print(f"Loading model from {model_folder}...")
+        trained_model = AutoModelForCausalLM.from_pretrained(
+            model_folder,
+            trust_remote_code=True,
+            torch_dtype=torch.bfloat16,
+        )
+        trained_tokenizer = AutoTokenizer.from_pretrained(model_folder, trust_remote_code=True, padding_side='left')
+        output_folder = model_folder
+
+    trained_tokenizer = AutoTokenizer.from_pretrained(tokenizer_folder, trust_remote_code=True, padding_side='left')
+
+    trained_model.generation_config = GenerationConfig(
+        max_new_tokens=128,
+        use_cache=True,
+        do_sample=True,
+        temperature=0.1,
+        top_k=40,
+        top_p=1.0,
+        repetition_penalty=1.15,
+        eos_token_id=trained_model.config.eos_token_id,
+        pad_token_id=trained_model.config.pad_token_id if trained_model.config.pad_token_id else trained_model.config.eos_token_id,
+    )
+
+    return trained_model, trained_tokenizer, output_folder
+
+def main():
+    parser = argparse.ArgumentParser(description="Evaluate the function calling for a model")
+    parser.add_argument("model")
+    parser.add_argument("--dataset_file", default="./data/home_assistant_test.jsonl")
+    parser.add_argument("--batch-size", default=8)
+    parser.add_argument("--lora", default=False, action='store_const', const=True)
+    parser.add_argument("--all-checkpoints", default=False, action='store_const', const=True)
+
+    args = parser.parse_args()
+    batch_size = int(args.batch_size)
+
+    dataset = load_dataset("json", data_files={ "train": args.dataset_file })["train"]
+
+    print(f"Got {len(dataset)} examples to test")
+
+    # filter out examples that are status requests
+    if "text" in dataset:
+        dataset = dataset.filter(lambda example: "```homeassistant" in example["text"])
+    else:
+        dataset = dataset.filter(lambda example: "```homeassistant" in example["conversations"][2]["value"])
+
+    torch.set_default_device("cuda")
+    if not args.all_checkpoints:
+        checkpoints = [None]
+    else:
+        if args.lora:
+            ckpt_folder = f"./loras/{args.model}"
+        else:
+            ckpt_folder = f"./models/{args.model}"
+        checkpoints = [x for x in os.listdir(ckpt_folder) if os.path.isdir(os.path.join(ckpt_folder, x)) and "checkpoint" in x]
+        checkpoints.append(None)
+
+        print(f"Found {len(checkpoints) - 1} checkpoints to test (plus the final model)")
+
+    for ckpt in checkpoints:
+        trained_model, trained_tokenizer, output_folder = load_model(args.model, args.lora, ckpt)
+        evaluate(output_folder, trained_model, trained_tokenizer, dataset, batch_size)
+
 
 if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/train.py b/train.py
index f2f2e93..9ce4295 100644
--- a/train.py
+++ b/train.py
@@ -55,14 +55,14 @@
 
 """
 python3 train.py \
-    --run_name stablehome-1_6b-rev2 \
+    --run_name stablehome-1_6b-rev3 \
     --base_model stabilityai/stablelm-2-zephyr-1_6b \
     --bf16 \
     --train_dataset data/home_assistant_train.jsonl \
     --test_dataset data/home_assistant_test.jsonl \
-    --learning_rate 1e-5 \
-    --micro_batch_size 2 --gradient_checkpointing \
-    --ctx_size 2048 --save_steps 200 --save_total_limit 6
+    --learning_rate 1e-5 --batch_size 32 \
+    --micro_batch_size 2 --gradient_checkpointing --group_by_length \
+    --ctx_size 2048 --save_steps 100 --save_total_limit 20
 """
 
 """
@@ -89,6 +89,18 @@
     --add_pad_token --bf16 --micro_batch_size 4 --learning_rate 2e-5
 """
 
+"""
+python3 train.py \
+    --run_name tinyhome-rev1 \
+    --base_model TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
+    --bf16 \
+    --train_dataset data/home_assistant_train.jsonl \
+    --test_dataset data/home_assistant_test.jsonl \
+    --learning_rate 5e-7 --batch_size 32 \
+    --micro_batch_size 2 --gradient_checkpointing --group_by_length \
+    --ctx_size 2048 --save_steps 100 --save_total_limit 10
+"""
+
 @dataclass
 class TrainingRunArguments:
     run_name: str = field(metadata={"help": "The folder to save the output model under"})

From ac7b71ca4f8fbb4978ab881c5919b8ad2445337c Mon Sep 17 00:00:00 2001
From: Alex O'Connell <git@alexoconnell.net>
Date: Sun, 24 Mar 2024 00:04:32 -0400
Subject: [PATCH 4/8] Make initial configuration easier + rewrite quickstart
 guide

---
 .../llama_conversation/__init__.py            |   8 -
 .../llama_conversation/config_flow.py         |  61 +++--
 custom_components/llama_conversation/const.py |  73 +++++-
 .../llama_conversation/translations/en.json   |  42 +++-
 docs/Setup.md                                 | 234 +++++++-----------
 5 files changed, 246 insertions(+), 172 deletions(-)

diff --git a/custom_components/llama_conversation/__init__.py b/custom_components/llama_conversation/__init__.py
index 32ed057..6a0f177 100644
--- a/custom_components/llama_conversation/__init__.py
+++ b/custom_components/llama_conversation/__init__.py
@@ -99,14 +99,6 @@ async def update_listener(hass, entry):
 async def async_setup_entry(hass: HomeAssistant, entry: ConfigEntry) -> bool:
     """Set up Local LLaMA Conversation from a config entry."""
 
-    # TODO: figure out how to make this happen as part of the config flow. when I tried it errored out passing options in
-    if len(entry.options) == 0:
-        entry.options = { **DEFAULT_OPTIONS }
-        copy_to_options = [ CONF_REMOTE_USE_CHAT_ENDPOINT, CONF_TEXT_GEN_WEBUI_CHAT_MODE, CONF_TEXT_GEN_WEBUI_PRESET ]
-        for item in copy_to_options:
-            value = entry.data.get(item)
-            if value:
-                entry.options[item] = value
 
     def create_agent(backend_type):
         agent_cls = None
diff --git a/custom_components/llama_conversation/config_flow.py b/custom_components/llama_conversation/config_flow.py
index 3ff8ffb..05300c7 100644
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -80,7 +80,6 @@
     DEFAULT_REMEMBER_CONVERSATION,
     DEFAULT_USE_IN_CONTEXT_LEARNING_EXAMPLES,
     DEFAULT_SERVICE_CALL_REGEX,
-    DEFAULT_OPTIONS,
     DEFAULT_REMOTE_USE_CHAT_ENDPOINT,
     DEFAULT_TEXT_GEN_WEBUI_CHAT_MODE,
     DEFAULT_OLLAMA_KEEP_ALIVE_MIN,
@@ -95,6 +94,8 @@
     TEXT_GEN_WEBUI_CHAT_MODE_INSTRUCT,
     TEXT_GEN_WEBUI_CHAT_MODE_CHAT_INSTRUCT,
     DOMAIN,
+    DEFAULT_OPTIONS,
+    OPTIONS_OVERRIDES,
 )
 
 _LOGGER = logging.getLogger(__name__)
@@ -138,23 +139,18 @@ def STEP_LOCAL_SETUP_DOWNLOAD_DATA_SCHEMA(*, chat_model=None, downloaded_model_q
         }
     )
 
-def STEP_REMOTE_SETUP_DATA_SCHEMA(backend_type: str, *, host=None, port=None, ssl=None, chat_model=None, use_chat_endpoint=None, webui_preset="", webui_chat_mode=""):
+def STEP_REMOTE_SETUP_DATA_SCHEMA(backend_type: str, *, host=None, port=None, ssl=None, chat_model=None):
 
     extra1, extra2 = ({}, {})
     default_port = DEFAULT_PORT
 
     if backend_type == BACKEND_TYPE_TEXT_GEN_WEBUI: 
-        extra1[vol.Optional(CONF_TEXT_GEN_WEBUI_PRESET, default=webui_preset)] = str
-        extra1[vol.Optional(CONF_TEXT_GEN_WEBUI_CHAT_MODE, default=webui_chat_mode)] = SelectSelector(SelectSelectorConfig(
-            options=["", TEXT_GEN_WEBUI_CHAT_MODE_CHAT, TEXT_GEN_WEBUI_CHAT_MODE_INSTRUCT, TEXT_GEN_WEBUI_CHAT_MODE_CHAT_INSTRUCT],
-            translation_key=CONF_TEXT_GEN_WEBUI_CHAT_MODE,
-            multiple=False,
-            mode=SelectSelectorMode.DROPDOWN,
-        ))
         extra2[vol.Optional(CONF_TEXT_GEN_WEBUI_ADMIN_KEY)] = TextSelector(TextSelectorConfig(type="password"))
 
     elif backend_type == BACKEND_TYPE_LLAMA_CPP_PYTHON_SERVER:
         default_port = "8000"
+    elif backend_type == BACKEND_TYPE_OLLAMA:
+        default_port = "11434"
 
     return vol.Schema(
         {
@@ -162,7 +158,6 @@ def STEP_REMOTE_SETUP_DATA_SCHEMA(backend_type: str, *, host=None, port=None, ss
             vol.Required(CONF_PORT, default=port if port else default_port): str,
             vol.Required(CONF_SSL, default=ssl if ssl else DEFAULT_SSL): bool,
             vol.Required(CONF_CHAT_MODEL, default=chat_model if chat_model else DEFAULT_CHAT_MODEL): str,
-            vol.Required(CONF_REMOTE_USE_CHAT_ENDPOINT, default=use_chat_endpoint if use_chat_endpoint else DEFAULT_REMOTE_USE_CHAT_ENDPOINT): bool,
             **extra1,
             vol.Optional(CONF_OPENAI_API_KEY): TextSelector(TextSelectorConfig(type="password")),
             **extra2
@@ -202,6 +197,12 @@ async def async_step_remote_model(
     ) -> FlowResult:
         """ Configure a remote model """
 
+    @abstractmethod
+    async def async_step_model_parameters(
+        self, user_input: dict[str, Any] | None = None
+    ) -> FlowResult:
+        """ Configure a remote model """
+
     @abstractmethod
     async def async_step_download(
         self, user_input: dict[str, Any] | None = None
@@ -223,6 +224,7 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
     download_task = None
     download_error = None
     model_config: dict[str, Any] = {}
+    options: dict[str, Any] = {}
 
     @property
     def flow_manager(self) -> config_entries.ConfigEntriesFlowManager:
@@ -384,7 +386,7 @@ async def async_step_download(
             next_step = "local_model"
         else:
             self.model_config[CONF_DOWNLOADED_MODEL_FILE] = self.download_task.result()
-            next_step = "finish"
+            next_step = "model_parameters"
 
         self.download_task = None
         return self.async_show_progress_done(next_step_id=next_step)
@@ -404,6 +406,7 @@ def _validate_text_generation_webui(self, user_input: dict) -> str:
 
             models_result = requests.get(
                 f"{'https' if self.model_config[CONF_SSL] else 'http'}://{self.model_config[CONF_HOST]}:{self.model_config[CONF_PORT]}/v1/internal/model/list",
+                timeout=5, # quick timeout
                 headers=headers
             )
             models_result.raise_for_status()
@@ -435,6 +438,7 @@ def _validate_ollama(self, user_input: dict) -> str:
 
             models_result = requests.get(
                 f"{'https' if self.model_config[CONF_SSL] else 'http'}://{self.model_config[CONF_HOST]}:{self.model_config[CONF_PORT]}/api/tags",
+                timeout=5, # quick timeout
                 headers=headers
             )
             models_result.raise_for_status()
@@ -488,12 +492,9 @@ async def async_step_remote_model(
                         port=user_input[CONF_PORT],
                         ssl=user_input[CONF_SSL],
                         chat_model=user_input[CONF_CHAT_MODEL],
-                        use_chat_endpoint=user_input[CONF_REMOTE_USE_CHAT_ENDPOINT],
-                        webui_preset=user_input.get(CONF_TEXT_GEN_WEBUI_PRESET),
-                        webui_chat_mode=user_input.get(CONF_TEXT_GEN_WEBUI_CHAT_MODE),
                     )
                 else:
-                    return await self.async_step_finish()
+                    return await self.async_step_model_parameters()
 
             except Exception:  # pylint: disable=broad-except
                 _LOGGER.exception("Unexpected exception")
@@ -502,6 +503,35 @@ async def async_step_remote_model(
         return self.async_show_form(
             step_id="remote_model", data_schema=schema, errors=errors, description_placeholders=description_placeholders,
         )
+    
+    async def async_step_model_parameters(
+        self, user_input: dict[str, Any] | None = None
+    ) -> FlowResult:
+        errors = {}
+        description_placeholders = {}
+        backend_type = self.model_config[CONF_BACKEND_TYPE]
+        model_name = self.model_config[CONF_CHAT_MODEL].lower()
+
+        selected_default_options = { **DEFAULT_OPTIONS }
+        for key in OPTIONS_OVERRIDES.keys():
+            if key in model_name:
+                selected_default_options.update(OPTIONS_OVERRIDES[key])
+        
+        schema = vol.Schema(local_llama_config_option_schema(selected_default_options, backend_type))
+
+        if user_input:
+            self.options = user_input
+            try:
+                # validate input
+                schema(user_input)
+                return await self.async_step_finish()
+            except Exception as ex:
+                _LOGGER.exception("An unknown error has occurred!")
+                errors["base"] = "unknown"
+
+        return self.async_show_form(
+            step_id="model_parameters", data_schema=schema, errors=errors, description_placeholders=description_placeholders,
+        )
 
     async def async_step_finish(
         self, user_input: dict[str, Any] | None = None
@@ -517,6 +547,7 @@ async def async_step_finish(
             title=f"LLM Model '{model_name}' ({location})",
             description="A Large Language Model Chat Agent",
             data=self.model_config,
+            options=self.options,
         )
 
     @staticmethod
diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py
index e0bac85..9f4081e 100644
--- a/custom_components/llama_conversation/const.py
+++ b/custom_components/llama_conversation/const.py
@@ -6,9 +6,25 @@
 DEFAULT_PROMPT = """You are 'Al', a helpful AI Assistant that controls the devices in a house. Complete the following task ask instructed with the information provided only.
 Services: {{ services }}
 Devices:
+{{ devices }}
+
+Respond to the following user instruction by responding in the same format as the following examples:
+{{ response_examples }}"""
+ICL_NO_SYSTEM_PROMPT = """You are 'Al', a helpful AI Assistant that controls the devices in a house. Complete the following task ask instructed with the information provided only.
+Services: {{ services }}
+Devices:
+{{ devices }}
+
+Respond to the following user instruction by responding in the same format as the following examples:
+{{ response_examples }}
+
+User instruction:"""
+NO_ICL_PROMPT = """You are 'Al', a helpful AI Assistant that controls the devices in a house. Complete the following task ask instructed with the information provided only.
+Services: {{ services }}
+Devices:
 {{ devices }}"""
 CONF_CHAT_MODEL = "huggingface_model"
-DEFAULT_CHAT_MODEL = "TheBloke/phi-2-GGUF" # "microsoft/phi-2"
+DEFAULT_CHAT_MODEL = "acon96/Home-3B-v3-GGUF"
 CONF_MAX_TOKENS = "max_new_tokens"
 DEFAULT_MAX_TOKENS = 128
 CONF_TOP_K = "top_k"
@@ -29,7 +45,7 @@
 DEFAULT_BACKEND_TYPE = BACKEND_TYPE_LLAMA_HF
 CONF_DOWNLOADED_MODEL_QUANTIZATION = "downloaded_model_quantization"
 CONF_DOWNLOADED_MODEL_QUANTIZATION_OPTIONS = ["F16", "Q8_0", "Q5_K_M", "Q4_K_M", "Q3_K_M"]
-DEFAULT_DOWNLOADED_MODEL_QUANTIZATION = "Q5_K_M"
+DEFAULT_DOWNLOADED_MODEL_QUANTIZATION = "Q4_K_M"
 CONF_DOWNLOADED_MODEL_FILE = "downloaded_model_file"
 DEFAULT_DOWNLOADED_MODEL_FILE = ""
 DEFAULT_HOST = "127.0.0.1"
@@ -90,7 +106,7 @@
 CONF_USE_GBNF_GRAMMAR = "gbnf_grammar"
 DEFAULT_USE_GBNF_GRAMMAR = False
 CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES = "in_context_examples"
-DEFAULT_USE_IN_CONTEXT_LEARNING_EXAMPLES = False
+DEFAULT_USE_IN_CONTEXT_LEARNING_EXAMPLES = True
 CONF_TEXT_GEN_WEBUI_PRESET = "text_generation_webui_preset"
 CONF_OPENAI_API_KEY = "openai_api_key"
 CONF_TEXT_GEN_WEBUI_ADMIN_KEY = "text_generation_webui_admin_key"
@@ -100,7 +116,8 @@
 DEFAULT_REFRESH_SYSTEM_PROMPT = True
 DEFAULT_REMEMBER_CONVERSATION = True
 CONF_SERVICE_CALL_REGEX = "service_call_regex"
-DEFAULT_SERVICE_CALL_REGEX = r"```homeassistant\n([\S \t\n]*?)```"
+DEFAULT_SERVICE_CALL_REGEX = r"({[\S \t]*?})"
+FINE_TUNED_SERVICE_CALL_REGEX = r"```homeassistant\n([\S \t\n]*?)```"
 CONF_REMOTE_USE_CHAT_ENDPOINT = "remote_use_chat_endpoint"
 DEFAULT_REMOTE_USE_CHAT_ENDPOINT = False
 CONF_TEXT_GEN_WEBUI_CHAT_MODE = "text_generation_webui_chat_mode"
@@ -127,5 +144,51 @@
         CONF_SERVICE_CALL_REGEX: DEFAULT_SERVICE_CALL_REGEX,
         CONF_REMOTE_USE_CHAT_ENDPOINT: DEFAULT_REMOTE_USE_CHAT_ENDPOINT,
         CONF_TEXT_GEN_WEBUI_CHAT_MODE: DEFAULT_TEXT_GEN_WEBUI_CHAT_MODE,
+        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: DEFAULT_USE_IN_CONTEXT_LEARNING_EXAMPLES,
+    }
+)
+
+OPTIONS_OVERRIDES = {
+    "home-3b-v3": {
+        CONF_PROMPT: NO_ICL_PROMPT,
+        CONF_PROMPT_TEMPLATE: PROMPT_TEMPLATE_ZEPHYR,
+        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+        CONF_SERVICE_CALL_REGEX: FINE_TUNED_SERVICE_CALL_REGEX,
+        CONF_USE_GBNF_GRAMMAR: True,
+    },
+    "home-3b-v2": {
+        CONF_PROMPT: NO_ICL_PROMPT,
+        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+        CONF_SERVICE_CALL_REGEX: FINE_TUNED_SERVICE_CALL_REGEX,
+        CONF_USE_GBNF_GRAMMAR: True,
+    },
+    "home-3b-v1": {
+        CONF_PROMPT: NO_ICL_PROMPT,
+        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+        CONF_SERVICE_CALL_REGEX: FINE_TUNED_SERVICE_CALL_REGEX,
+    },
+    "home-1b-v2": {
+        CONF_PROMPT: NO_ICL_PROMPT,
+        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+        CONF_SERVICE_CALL_REGEX: FINE_TUNED_SERVICE_CALL_REGEX,
+    },
+    "home-1b-v1": {
+        CONF_PROMPT: NO_ICL_PROMPT,
+        CONF_USE_IN_CONTEXT_LEARNING_EXAMPLES: False,
+        CONF_SERVICE_CALL_REGEX: FINE_TUNED_SERVICE_CALL_REGEX,
+    },
+    "mistral": {
+        CONF_PROMPT: ICL_NO_SYSTEM_PROMPT,
+        CONF_PROMPT_TEMPLATE: PROMPT_TEMPLATE_MISTRAL,
+    },
+    "mixtral": {
+        CONF_PROMPT: ICL_NO_SYSTEM_PROMPT,
+        CONF_PROMPT_TEMPLATE: PROMPT_TEMPLATE_MISTRAL,
+    },
+    "llama-2": {
+        CONF_PROMPT_TEMPLATE: PROMPT_TEMPLATE_LLAMA2,
+    },
+    "zephyr": {
+        CONF_PROMPT_TEMPLATE: PROMPT_TEMPLATE_ZEPHYR,
     }
-)
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/custom_components/llama_conversation/translations/en.json b/custom_components/llama_conversation/translations/en.json
index d25cc20..a14fecc 100644
--- a/custom_components/llama_conversation/translations/en.json
+++ b/custom_components/llama_conversation/translations/en.json
@@ -21,8 +21,8 @@
                     "downloaded_model_quantization": "Downloaded model quantization",
                     "huggingface_model": "HuggingFace Model"
                 },
-                "description": "Please configure llama.cpp for the model",
-                "title": "Configure llama.cpp"
+                "description": "Please select the model to use",
+                "title": "Select Model"
             },
             "remote_model": {
                 "data": {
@@ -36,8 +36,8 @@
                     "remote_use_chat_endpoint": "Use chat completions endpoint",
                     "text_generation_webui_chat_mode": "Chat Mode"
                 },
-                "description": "Provide the connection details for an instance of text-generation-webui that is hosting the model.",
-                "title": "Configure connection to remote API"
+                "description": "Provide the connection details to connect to the API that is hosting the model.",
+                "title": "Configure connection to remote Model API"
             },
             "pick_backend": {
                 "data": {
@@ -46,6 +46,40 @@
                 },
                 "description": "Select the backend for running the model. The options are:\n1. Llama.cpp with a model from HuggingFace\n2. Llama.cpp with a model stored on the disk\n3. [text-generation-webui API](https://github.com/oobabooga/text-generation-webui)\n4. Generic OpenAI API Compatible API\n5. [llama-cpp-python Server](https://llama-cpp-python.readthedocs.io/en/latest/server/)\n6. [Ollama API](https://github.com/jmorganca/ollama/blob/main/docs/api.md)\n\nIf using Llama.cpp locally, make sure you copied the correct wheel file to the same directory as the integration.",
                 "title": "Select Backend"
+            },
+            "model_parameters": {
+                "data": {
+                    "max_new_tokens": "Maximum tokens to return in response",
+                    "prompt": "System Prompt",
+                    "prompt_template": "Prompt Format",
+                    "temperature": "Temperature",
+                    "top_k": "Top K",
+                    "top_p": "Top P",
+                    "request_timeout": "Remote Request Timeout (seconds)",
+                    "ollama_keep_alive": "Keep Alive/Inactivity Timeout (minutes)",
+                    "extra_attributes_to_expose": "Additional attribute to expose in the context",
+                    "allowed_service_call_arguments": "Arguments allowed to be pass to service calls",
+                    "gbnf_grammar": "Enable GBNF Grammar",
+                    "openai_api_key": "API Key",
+                    "text_generation_webui_admin_key": "Admin Key",
+                    "service_call_regex": "Service Call Regex",
+                    "refresh_prompt_per_tern": "Refresh System Prompt Every Turn",
+                    "remember_conversation": "Remember conversation",
+                    "remember_num_interactions": "Number of past interactions to remember",
+                    "in_context_examples": "Enable in context learning (ICL) examples",
+                    "text_generation_webui_preset": "Generation Preset/Character Name",
+                    "remote_use_chat_endpoint": "Use chat completions endpoint",
+                    "text_generation_webui_chat_mode": "Chat Mode"
+                },
+                "data_description": {
+                    "prompt": "See [here](https://github.com/acon96/home-llm/blob/develop/docs/Model%20Prompting.md) for more information on model prompting.",
+                    "in_context_examples": "If you are using a model that is not specifically fine-tuned for use with this integration: enable this option",
+                    "remote_use_chat_endpoint": "If this is enabled, then the integration will use the chat completion HTTP endpoint instead of the text completion one.",
+                    "extra_attributes_to_expose": "This is the list of Home Assistant 'attributes' that are exposed to the model. This limits how much information the model is able to see and answer questions on.",
+                    "allowed_service_call_arguments": "This is the list of parameters that are allowed to be passed to Home Assistant service calls."
+                },
+                "description": "Please configure the model according to how it should be prompted. Defaults have been chosen for you based on the selected model.",
+                "title": "Configure the selected model"
             }
         }
     },
diff --git a/docs/Setup.md b/docs/Setup.md
index b52eead..07ae45b 100644
--- a/docs/Setup.md
+++ b/docs/Setup.md
@@ -1,154 +1,122 @@
-# Setup Instructions
+# Quickstart Guide
+
+## TOC
+* [Intro](#intro)
+    * [Requirements](#requirements)
+* [Install the Home Assistant Component with HACs](#install-the-home-assistant-component-with-hacs)
+* [Path 1: Using the Home Model with the Llama.cpp Backend](#path-1-using-the-home-model-with-llamacpp-backend)
+    * [Overview](#overview)
+    * [Step 1: Wheel Installation for llama-cpp-python](#step-1-wheel-installation-for-llama-cpp-python)
+    * [Step 2: Model Selection](#step-2-model-selection)
+    * [Step 3: Model Configuration](#step-3-model-configuration)
+* [Path 2: Using Mistral-Instruct-7B with Ollama Backend](#path-2-using-mistral-instruct-7b-with-ollama-backend)
+    * [Overview](#overview-1)
+    * [Step 1: Downloading and serving the Model](#step-1-downloading-and-serving-the-model)
+    * [Step 2: Connect to the Ollama API](#step-2-connect-to-the-ollama-api)
+    * [Step 3: Model Configuration](#step-3-model-configuration-1)
+* [Configuring the Integration as a Conversation Agent](#configuring-the-integration-as-a-conversation-agent)
+* [Finished!](#finished)
+
+
+## Intro
+Welcome to the Quickstart guide for setting up Home-LLM. The component has MANY configuration options and is designed for experimentation with Home Assistant and LLMs. This guide outlines two main paths to get you started on your journey:
+1. using the Llama.cpp backend with our provided fine-tuned model downloaded from HuggingFace  
+2. using the Ollama backend with the Mistral-Instruct-7B model using in-context learning
 
-1. [Home Assistant Component](#home-assistant-component)
-2. [Configuring the LLM as a Conversation Agent](#configuring-as-a-conversation-agent)
-3. [Setting up the text-generation-webui Addon](#text-generation-webui-add-on)
-
-## Home Assistant Component
 ### Requirements
-
+- Knowledge of how to use a command line on Linux, MacOS, or Windows Subsystem for Linux
 - A supported version of Home Assistant; `2023.10.0` or newer
 - SSH or Samba access to your Home Assistant instance
+- [HACs](https://hacs.xyz/docs/setup/download/) is installed
 
-**Optional:**
-- [HACs](https://hacs.xyz/docs/setup/download/) (if you want to install it that way)
-
-### 💾 🚕 Install the Home Assistant Component with HACs
-
-> 🛑 ✋🏻 Requires HACs
-> 
-> First make sure you have [HACs installed](https://hacs.xyz/docs/setup/download/)
-
-Once you have HACs installed, this button will help you add the repository to HACS and open the download page
-
-[![Open your Home Assistant instance and open a repository inside the Home Assistant Community Store.](https://my.home-assistant.io/badges/hacs_repository.svg)](https://my.home-assistant.io/redirect/hacs_repository/?category=Integration&repository=home-llm&owner=acon96)
- 
+## Install the Home Assistant Component with HACs
+The following link will open your Home Assistant installation and download the integration:  
 **Remember to restart Home Assistant after installing the component!**
 
-A "LLaMA Conversation" device should show up in the `Settings > Devices and Services > [Devices]` tab now:
-![image](https://github.com/acon96/home-llm/assets/61225/4427e362-e443-4796-bee8-5bdda18305d0)
-
-
-### 💾 🔨 Install the Home Assistant Component Manually
-
-1. Ensure you have either the Samba, SSH, FTP, or another add-on installed that gives you access to the `config` folder
-2. If there is not already a `custom_components` folder, create one now.
-3. Copy the `custom_components/llama_conversation` folder from this repo to `config/custom_components/llama_conversation` on your Home Assistant machine.
-4. Restart Home Assistant: `Developer Tools -> Services -> Run` : `HomeAssistant.restart`
-
-A "LLaMA Conversation" device should show up in the `Settings > Devices and Services > [Devices]` tab now:
-![image](https://github.com/acon96/home-llm/assets/61225/4427e362-e443-4796-bee8-5bdda18305d0)
-
-
-### ⚙️ Configuration and Setup
-You must configure at least one model by configuring the integration.
-
-1. `Settings > Devices and Services`.
-2. Click the `Add Integration` button in the bottom right of the screen.
-3. Filter the list of "brand names" for llama, and "LLaMa Conversation" should remain.
-4. Choose the backend you will be using to host the model:
-    1. Using builtin llama.cpp with hugging face
-    2. Using builtin llama.cpp with existing model file
-    3. using text-generation-webui api
-    4. using generic openapi compatiable api
-    5. using ollama api
-
-### llama-cpp-python Wheel Installation
-
-If you plan on running the model locally on the same hardware as your Home Assistant server, then the recommended way to run the model is to use Llama.cpp. Unfortunately there are not pre-build wheels for this package for the musllinux runtime that Home Assistant Docker images use. To get around this, we provide compatible wheels for x86_x64 and arm64 in the [dist](./dist) folder. 
-
-Download the `*.whl` file that matches your hardware and then copy the `*.whl` file to the `custom_components/llama_conversation/` folder. It will be installed as a configuration step while setting up the Home Assistant component.
-
-| wheel | platform | home assistant version |
-| --- | --- | --- |
-| llama_cpp_python-{version}-cp311-cp311-musllinux_1_2_aarch64.whl | aarch64 (RPi 4 and 5) | `2024.1.4` and older |
-| llama_cpp_python-{version}-cp311-cp311-musllinux_1_2_x86_64.whl | x86_64 (Intel + AMD) | `2024.1.4` and older |
-| llama_cpp_python-{version}-cp312-cp312-musllinux_1_2_aarch64.whl | aarch64 (RPi 4 and 5) | `2024.2.0` and newer |
-| llama_cpp_python-{version}-cp312-cp312-musllinux_1_2_x86_64.whl | x86_64 (Intel + AMD) | `2024.2.0` and newer |
-
-### Constrained Grammar
-
-When running the model locally with [Llama.cpp], the component also constrains the model output using a GBNF grammar.
-This forces the model to provide valid output no matter what since its outputs are constrained to valid JSON every time.
-This helps the model perform significantly better at lower quantization levels where it would previously generate syntax errors. It is recommended to turn this on when using the component as it will reduce the incorrect output from the model.
-
-For more information See [output.gbnf](./custom_components/llama_conversation/output.gbnf) for the existing grammar.
-
-
-### Backend Configuration
+[![Open your Home Assistant instance and open a repository inside the Home Assistant Community Store.](https://my.home-assistant.io/badges/hacs_repository.svg)](https://my.home-assistant.io/redirect/hacs_repository/?category=Integration&repository=home-llm&owner=acon96)
 
-![image](https://github.com/airtonix/home-llm/assets/61225/6f5d9748-5bfc-47ce-8abc-4f07d389a73f)
+After installation, A "LLaMA Conversation" device should show up in the `Settings > Devices and Services > [Devices]` tab now.
 
-When setting up the component, there are 5 different "backend" options to choose from:
+## Path 1: Using the Home Model with the Llama.cpp Backend
+### Overview
+This setup path involves downloading a fine-tuned model from HuggingFace and integrating it with Home Assistant using the Llama.cpp backend. This option is for Home Assistant setups without a dedicated GPU, and the model is capable of running on most devices, and can even run on a Raspberry Pi (although slowly).
 
-a. Llama.cpp with a model from HuggingFace  <--- recommended if you are lost  
-b. Llama.cpp with a locally provided model  
-c. A remote instance of text-generation-webui  
-d. A generic OpenAI API compatible interface; *should* be compatible with LocalAI, LM Studio, and all other OpenAI compatible backends  
-e. Ollama api  
+### Step 1: Wheel Installation for llama-cpp-python
+In order to run the Llama.cpp backend as part of Home Assistant, we need to install the binary "wheel" distribution that is pre-built for compatibility with Home Assistant.
 
-See [docs/Backend Configuration.md](/docs/Backend%20Configuration.md) for more info.
+The `*.whl` files are located in the [/dist](/dist) folder of this repository.
 
-#### Llama.cpp Backend with a model from HuggingFace
+To ensure compatibility with your Home Assistant and Python versions, select the correct `.whl` file for your hardware's architecture:
+- For Home Assistant `2024.1.4` and older, use the Python 3.11 wheels (`cp311`)
+- For Home Assistant `2024.2.0` and newer, use the Python 3.12 wheels (`cp312`)
+- **ARM devices** (e.g., Raspberry Pi 4/5):
+    - Example filenames:
+        - `llama_cpp_python-{version}-cp311-cp311-musllinux_1_2_aarch64.whl`
+        - `llama_cpp_python-{version}-cp312-cp312-musllinux_1_2_aarch64.whl`
+- **x86_64 devices** (e.g., Intel/AMD desktops):
+    - Example filenames:
+        - `llama_cpp_python-{version}-cp311-cp311-musllinux_1_2_x86_64.whl`
+        - `llama_cpp_python-{version}-cp312-cp312-musllinux_1_2_x86_64.whl`
+Download the appropriate wheel and copy it to the `custom_components/llama_conversation/` directory.
 
-This is option A
+After the wheel file has been copied to the correct folder.
+1. In Home Assistant: navigate to `Settings > Devices and Services`
+2. Select the `+ Add Integration` button in the bottom right corner
+3. Search for, and select `LLaMA Conversation`
+4. With the `Llama.cpp (HuggingFace)` backend selected, click `Submit`
 
-It is recommended to use either `acon96/Home-3B-v3-GGUF` or `acon96/Home-1B-v2-GGUF` as the model for this integration.
-NOTE: if you are using `acon96/Home-3B-v3-GGUF`, you need to set the prompt template to `Zephyr` after setting up the component by configuring the model after creation.
+This will trigger the installation of the wheel. If you ever need to update the version of Llama.cpp, you can copy a newer wheel file to the same folder, and re-create the integration; this will re-trigger the install process.
 
-You need the following settings to configure the local backend from HuggingFace:
-1. **Model Name**: the name of the model in the form `repo/model-name`. The repo MUST contain a GGUF quantized model.
-2. **Model Quantization**: The quantization level to download. Pick from the list. Higher quantizations use more RAM but have higher quality responses.
+Once `llama-cpp-python` is installed, continue to the model selection.
 
-#### Llama.cpp Backend with a locally downloaded model
+### Step 2: Model Selection
+The next step is to specify which model will be used by the integration. You may select any repository on HuggingFace that has a model in GGUF format in it.  We will use `acon96/Home-3B-v3-GGUF` for this example.  If you have less than 4GB of RAM then use ``acon96/Home-1B-v2-GGUF`.
 
-This is option B
+**Model Name**: Use either `acon96/Home-3B-v3-GGUF` or `acon96/Home-1B-v2-GGUF`
+**Quantization Level**: The model will be downloaded in the selected quantization level from the HuggingFace repository. If unsure which level to choose, select `Q4_K_M`.
 
-Please download the model file from HuggingFace and copy it to your Home Assistant device. Recommended models are [acon96/Home-3B-v3-GGUF](https://huggingface.co/acon96/Home-3B-v3-GGUF) or [acon96/Home-1B-v2-GGUF](https://huggingface.co/acon96/Home-1B-v2-GGUF).
+Pressing `Submit` will download the model from HuggingFace.
 
-NOTE: if you are using `acon96/Home-3B-v3-GGUF`, you need to set the prompt template to `Zephyr` after setting up the component by configuring the model after creation.
+### Step 3: Model Configuration
+This step allows you to configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.
 
-You need the following settings to configure the local backend from HuggingFace:
-1. **Model File Name**: the file name where Home Assistant can access the model to load. Most likely a sub-path of `/config` or `/media` or wherever you copied the model file to.
+For now, defaults for the model should have been populated and you can just scroll to the bottom and click `Submit`.
 
-#### Remote Backends
+The model will be loaded into memory and should now be available to select as a conversation agent!
 
-This is options C, D and E
+## Path 2: Using Mistral-Instruct-7B with Ollama Backend
+### Overview
+For those who have access to a GPU, you can also use the Mistral-Instruct-7B model to power your conversation agent. This path requires a separate machine that has a GPU that has [Ollama](https://ollama.com/) already installed on it.  This path utilizes in-context learning examples, to prompt the model to produce the output that we expect.
 
-You need the following settings in order to configure the "remote" backend:
-1. **Hostname**: the host of the machine where text-generation-webui API is hosted. If you are using the provided add-on then the hostname is `local-text-generation-webui` or `f459db47-text-generation-webui` depending on how the addon was installed.
-2. **Port**: the port for accessing the text-generation-webui API. NOTE: this is not the same as the UI port. (Usually 5000)
-3. **Name of the Model**: This name must EXACTLY match the name as it appears in `text-generation-webui`
+### Step 1: Downloading and serving the Model
+Mistral can be easily set up and downloaded on the serving machine using the `ollama pull mistral` command.
 
-With the remote text-generation-webui backend, the component will validate that the selected model is available for use and will ensure it is loaded remotely. The Generic OpenAI compatible version does NOT do any validation or model loading.
+In order to access the model from another machine, we need to run the Ollama API server open to the local network. This can be achieved using the `OLLAMA_HOST=0.0.0.0:11434 ollama serve` command. **DO NOT RUN THIS COMMAND ON ANY PUBLICLY
+ ACCESSIBLE SERVERS AS IT LISTENS ON ALL NETWORK INTERFACES**
 
-**Setting up with LocalAI**:  
-If you are an existing LocalAI user or would like to use LocalAI as your backend, please refer to [this](https://io.midori-ai.xyz/howtos/setup-with-ha/) website which has instructions on how to setup LocalAI to work with Home-LLM including automatic installation of the latest version of the the Home-LLM model. The auto-installer (LocalAI Manager) will automatically download and setup LocalAI and/or the model of your choice and automatically create the necessary template files for the model to work with this integration.
+### Step 2: Connect to the Ollama API
 
-**Setting up Ollama**:  
-In order to use the GGUF model with Ollama:
+1. In Home Assistant: navigate to `Settings > Devices and Services`
+2. Select the `+ Add Integration` button in the bottom right corner
+3. Search for, and select `LLaMA Conversation`
+4. Select `Ollama API` from the dropdown and click `Submit`
+5. Set up the connection to the API:
+    - **IP Address**: Fill out IP Address for the machine hosting Ollama
+    - **Port**: leave on `11434`
+    - **Use HTTPS**: unchecked
+    - **Model Name**: `mistral:latest`
+    - **API Key**: leave blank
+6. Click `Submit`
 
-1. Download the desired quantization level of the model from HuggingFace (see above for links)
-2. Copy the following block into a `Modelfile`:
-```
-FROM Home-3B-v3.q4_k_m.gguf
-PARAMETER num_ctx 2048
-PARAMETER temperature 0.1
-PARAMETER top_k 40
-PARAMETER top_p 0.95
-PARAMETER stop "<|endoftext|>"
-PARAMETER stop "<|im_end|>"
-```
+### Step 3: Model Configuration
+This step allows you to configure how the model is "prompted". See [here](./Model%20Prompting.md) for more information on how that works.
 
-3. Change the `FROM` line to match the filename of the quantized model that you downloaded
-4. Create the model with Ollama
-    - `ollama create home-3b-v3:q4_k_m -f Modelfile`
-5. Start the server
-    - `ollama serve`
+For now, defaults for the model should have been populated and you can just scroll to the bottom and click `Submit`.
 
-## Configuring as a Conversation Agent
+## Configuring the Integration as a Conversation Agent
+Now that the integration is configured and providing the conversation agent, we need to configure Home Assistant to use our conversation agent instead of the built in intent recognition system.
 
-> 🛑 ✋🏻 Security Warning 
+> 🛑 Warning 🛑
 > 
 > Any devices that you select to be exposed to the model will be added as 
 > context and potentially have their state changed by the model.
@@ -163,11 +131,9 @@ PARAMETER stop "<|im_end|>"
 2. Select `+ Add Assistant`
 3. Name the assistant whatever you want.
 4. Select the conversation agent that we created previously.
-5. If using STT or TTS configure these now
-6. Return to the "Overview" dashboard and select chat icon in the top left.
-7. From here you can submit queries to the AI agent.
+5. If you wish to use Speech to Text or Text to Speech, set those up now (left as an exercise to the reader)
 
-In order for any entities be available to the agent, you must "expose" them first.
+In order for any entities be available to the agent, you must "expose" them first.  An exposed entity is added to the model's context and the model is able to call services on your behalf against those entities.
 
 1. Navigate to "Settings" -> "Voice Assistants" -> "Expose" Tab
 2. Select "+ Expose Entities" in the bottom right
@@ -177,18 +143,6 @@ In order for any entities be available to the agent, you must "expose" them firs
 > When exposing entities to the model, you are adding tokens to the model's context. If you exceed the context length of the model, then your interactions with the model will fail due to the instructions being dropped out of the context's sliding window.  
 > It is recommended to only expose a maximum of 32 entities to this conversation agent at this time.
 
-## text-generation-webui add-on
-You can use this button to automatically download and build the addon for `oobabooga/text-generation-webui`
-
-[![Open your Home Assistant instance and show the dashboard of an add-on.](https://my.home-assistant.io/badges/supervisor_addon.svg)](https://my.home-assistant.io/redirect/supervisor_addon/?addon=f459db47_text-generation-webui&repository_url=https%3A%2F%2Fgithub.com%2Facon96%2Fhome-llm)
-
-If the automatic installation fails then you can install the addon manually using the following steps:
-
-1. Ensure you have either the Samba, SSH, FTP, or another add-on installed that gives you access to the `addons` folder
-2. Copy the `addon` folder from this repo to `addons/text-generation-webui` on your Home Assistant machine.
-3. Go to the "Add-ons" section in settings and then pick the "Add-on Store" from the bottom right corner.
-4. Select the 3 dots in the top right and click "Check for Updates" and Refresh the webpage.
-5. There should now be a "Local Add-ons" section at the top of the "Add-on Store"
-6. Install the `oobabooga-text-generation-webui` add-on. It will take ~15-20 minutes to build the image on a Raspberry Pi.
-7. Copy any models you want to use to the `addon_configs/local_text-generation-webui/models` folder or download them using the UI.
-8. Load up a model to use. NOTE: The timeout for ingress pages is only 60 seconds so if the model takes longer than 60 seconds to load (very likely) then the UI will appear to time out and you will need to navigate to the add-on's logs to see when the model is fully loaded.
\ No newline at end of file
+## Finished!
+Return to the "Overview" dashboard and select chat icon in the top left.  
+From here you can chat with the AI model and request it to control your house.
\ No newline at end of file

From b555e5a50219df958bd727cbe5cb1678e9924ca2 Mon Sep 17 00:00:00 2001
From: Alex O'Connell <git@alexoconnell.net>
Date: Sun, 24 Mar 2024 07:25:46 -0400
Subject: [PATCH 5/8] fix typo in validate

---
 custom_components/llama_conversation/config_flow.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/custom_components/llama_conversation/config_flow.py b/custom_components/llama_conversation/config_flow.py
index 05300c7..de50d8d 100644
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -470,7 +470,7 @@ async def async_step_remote_model(
         if user_input:
             try:
                 self.model_config.update(user_input)
-                error_reason = None
+                error_message = None
 
                 # validate and load when using text-generation-webui or ollama
                 if backend_type == BACKEND_TYPE_TEXT_GEN_WEBUI:

From 46e1c4fc1d8ae10a678f452625da050c95dd7c9f Mon Sep 17 00:00:00 2001
From: Alex O'Connell <git@alexoconnell.net>
Date: Sun, 24 Mar 2024 10:56:50 -0400
Subject: [PATCH 6/8] reset model_config when starting the configflow

---
 .../llama_conversation/config_flow.py         |  6 ++-
 custom_components/llama_conversation/const.py | 44 ++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/custom_components/llama_conversation/config_flow.py b/custom_components/llama_conversation/config_flow.py
index de50d8d..795139c 100644
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -223,8 +223,8 @@ class ConfigFlow(BaseLlamaConversationConfigFlow, config_entries.ConfigFlow, dom
     install_wheel_error = None
     download_task = None
     download_error = None
-    model_config: dict[str, Any] = {}
-    options: dict[str, Any] = {}
+    model_config: dict[str, Any]
+    options: dict[str, Any]
 
     @property
     def flow_manager(self) -> config_entries.ConfigEntriesFlowManager:
@@ -239,6 +239,8 @@ async def async_step_user(
         self, user_input: dict[str, Any] | None = None
     ) -> FlowResult:
         """Handle the initial step."""
+        self.model_config = {}
+        self.options = {}
         return await self.async_step_pick_backend()
 
     async def async_step_pick_backend(
diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py
index 9f4081e..52cee2a 100644
--- a/custom_components/llama_conversation/const.py
+++ b/custom_components/llama_conversation/const.py
@@ -1,5 +1,8 @@
 """Constants for the LLaMa Conversation integration."""
 import types
+# import voluptuous as vol
+# import homeassistant.helpers.config_validation as cv
+# from homeassistant.const import CONF_HOST, CONF_PORT, CONF_SSL
 
 DOMAIN = "llama_conversation"
 CONF_PROMPT = "prompt"
@@ -191,4 +194,43 @@
     "zephyr": {
         CONF_PROMPT_TEMPLATE: PROMPT_TEMPLATE_ZEPHYR,
     }
-}
\ No newline at end of file
+}
+
+# TODO: need to rewrite the internal config_entry key names so they actually make sense before we expose this
+# method of configuring the component. doing so will require writing a config version upgrade migration
+# MODEL_CONFIG_SCHEMA = vol.Schema(
+#     {
+#         vol.Required(CONF_BACKEND_TYPE): vol.All(
+#             vol.In([
+#                 BACKEND_TYPE_LLAMA_EXISTING,
+#                 BACKEND_TYPE_TEXT_GEN_WEBUI,
+#                 BACKEND_TYPE_LLAMA_CPP_PYTHON_SERVER,
+#                 BACKEND_TYPE_OLLAMA,
+#                 BACKEND_TYPE_GENERIC_OPENAI,
+#             ])
+#         ),
+#         vol.Optional(CONF_HOST, default=DEFAULT_HOST): cv.string,
+#         vol.Optional(CONF_PORT, default=DEFAULT_PORT): cv.port,
+#         vol.Optional(CONF_SSL, default=DEFAULT_SSL): cv.boolean,
+#         vol.Optional("options"): vol.Schema(
+#             {
+#                 vol.Optional(CONF_PROMPT): cv.string,
+#                 vol.Optional(CONF_PROMPT_TEMPLATE): vol.All(
+#                     vol.In([
+#                         PROMPT_TEMPLATE_ALPACA,
+#                         PROMPT_TEMPLATE_CHATML,
+#                         PROMPT_TEMPLATE_LLAMA2,
+#                         PROMPT_TEMPLATE_MISTRAL,
+#                         PROMPT_TEMPLATE_VICUNA,
+#                         PROMPT_TEMPLATE_ZEPHYR,
+#                     ])
+#                 ),
+#             }
+#         )
+#     }
+# )
+
+# CONFIG_SCHEMA = vol.Schema(
+#     { DOMAIN: vol.All(cv.ensure_list, [MODEL_CONFIG_SCHEMA]) },
+#     extra=vol.ALLOW_EXTRA,
+# )
\ No newline at end of file

From bf04cc3e6e9f4f9f749fc699adb3cf9263325344 Mon Sep 17 00:00:00 2001
From: Alex O'Connell <git@alexoconnell.net>
Date: Sun, 24 Mar 2024 11:02:49 -0400
Subject: [PATCH 7/8] remove default host since it didn't make sense

---
 custom_components/llama_conversation/config_flow.py | 3 +--
 custom_components/llama_conversation/const.py       | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/custom_components/llama_conversation/config_flow.py b/custom_components/llama_conversation/config_flow.py
index 795139c..60b7a4d 100644
--- a/custom_components/llama_conversation/config_flow.py
+++ b/custom_components/llama_conversation/config_flow.py
@@ -61,7 +61,6 @@
     CONF_TEXT_GEN_WEBUI_CHAT_MODE,
     CONF_OLLAMA_KEEP_ALIVE_MIN,
     DEFAULT_CHAT_MODEL,
-    DEFAULT_HOST,
     DEFAULT_PORT,
     DEFAULT_SSL,
     DEFAULT_MAX_TOKENS,
@@ -154,7 +153,7 @@ def STEP_REMOTE_SETUP_DATA_SCHEMA(backend_type: str, *, host=None, port=None, ss
 
     return vol.Schema(
         {
-            vol.Required(CONF_HOST, default=host if host else DEFAULT_HOST): str,
+            vol.Required(CONF_HOST, default=host if host else ""): str,
             vol.Required(CONF_PORT, default=port if port else default_port): str,
             vol.Required(CONF_SSL, default=ssl if ssl else DEFAULT_SSL): bool,
             vol.Required(CONF_CHAT_MODEL, default=chat_model if chat_model else DEFAULT_CHAT_MODEL): str,
diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py
index 52cee2a..da11673 100644
--- a/custom_components/llama_conversation/const.py
+++ b/custom_components/llama_conversation/const.py
@@ -51,7 +51,6 @@
 DEFAULT_DOWNLOADED_MODEL_QUANTIZATION = "Q4_K_M"
 CONF_DOWNLOADED_MODEL_FILE = "downloaded_model_file"
 DEFAULT_DOWNLOADED_MODEL_FILE = ""
-DEFAULT_HOST = "127.0.0.1"
 DEFAULT_PORT = "5000"
 DEFAULT_SSL = False
 CONF_EXTRA_ATTRIBUTES_TO_EXPOSE = "extra_attributes_to_expose"
@@ -209,7 +208,7 @@
 #                 BACKEND_TYPE_GENERIC_OPENAI,
 #             ])
 #         ),
-#         vol.Optional(CONF_HOST, default=DEFAULT_HOST): cv.string,
+#         vol.Optional(CONF_HOST): cv.string,
 #         vol.Optional(CONF_PORT, default=DEFAULT_PORT): cv.port,
 #         vol.Optional(CONF_SSL, default=DEFAULT_SSL): cv.boolean,
 #         vol.Optional("options"): vol.Schema(

From 1c2cbc63d6e1ae610a982d664f508dc07d550e32 Mon Sep 17 00:00:00 2001
From: Alex O'Connell <git@alexoconnell.net>
Date: Sun, 24 Mar 2024 11:07:10 -0400
Subject: [PATCH 8/8] Release v0.2.10

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 502918b..0cc04f2 100644
--- a/README.md
+++ b/README.md
@@ -129,6 +129,7 @@ In order to facilitate running the project entirely on the system where Home Ass
 ## Version History
 | Version | Description                                                                                                                                                                                      |
 | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| v0.2.10 | Allow configuring the model parameters during initial setup, attempt to auto-detect defaults for recommended models, Fix to allow lights to be set to max brightness                             |
 | v0.2.9  | Fix HuggingFace Download, Fix llama.cpp wheel installation, Fix light color changing, Add in-context-learning support                                                                            | 
 | v0.2.8  | Fix ollama model names with colons                                                                                                                                                               |
 | v0.2.7  | Publish model v3, Multiple Ollama backend improvements, Updates for HA 2024.02, support for voice assistant aliases                                                                              |