diff --git a/.gitignore b/.gitignore
index ac12f685..e74eb2bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,4 @@ index.faiss
 *.svg
 # ignore the softlink to adalflow cache
 *.adalflow
+extend/
diff --git a/README.md b/README.md
index 7d9eb63a..65e7808a 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@
         <a href="https://adalflow.sylph.ai/apis/components/components.model_client.html">Models</a> |
         <a href="https://adalflow.sylph.ai/apis/components/components.retriever.html">Retrievers</a> |
         <a href="https://adalflow.sylph.ai/apis/components/components.agent.html">Agents</a> |
+        <a href="https://adalflow.sylph.ai/tutorials/evaluation.html"> LLM evaluation</a> |
         <a href="https://adalflow.sylph.ai/use_cases/question_answering.html">Trainer & Optimizers</a>
     <p>
 </h4>
@@ -212,7 +213,7 @@ AdalFlow full documentation available at [adalflow.sylph.ai](https://adalflow.sy
 
 # AdalFlow: A Tribute to Ada Lovelace
 
-AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could do more than just calculations. As a female-led team, we aim to inspire more women to enter the AI field.
+AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_Lovelace), the pioneering female mathematician who first recognized that machines could do more than just calculations. As a team led by female founder, we aim to inspire more women to enter the AI field.
 
 # Contributors
 
@@ -238,6 +239,12 @@ Many existing works greatly inspired AdalFlow library! Here is a non-exhaustive
   month = {7},
   year = {2024},
   doi = {10.5281/zenodo.12639531},
-  url = {https://github.com/SylphAI-Inc/LightRAG}
+  url = {https://github.com/SylphAI-Inc/AdalFlow}
 }
 ```
+
+# Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=SylphAI-Inc/AdalFlow&type=Date)](https://star-history.com/#SylphAI-Inc/AdalFlow&Date)
+<!--
+<a href="https://trendshift.io/repositories/11559" target="_blank"><img src="https://trendshift.io/api/badge/repositories/11559" alt="SylphAI-Inc%2FAdalFlow | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a> -->
diff --git a/adalflow/CHANGELOG.md b/adalflow/CHANGELOG.md
index d14cdc04..58ed5f05 100644
--- a/adalflow/CHANGELOG.md
+++ b/adalflow/CHANGELOG.md
@@ -1,3 +1,16 @@
+## [0.2.3.beta.1] - 2024-09-17
+### Removed
+- Removed /reasoning as COT is just too simple to be a separate module.
+### Fixed
+- datasets/hotpotqa.py
+- eval/answer_match_acc: added lower() to both the gt and pred in the fuzzy match. On hotpotqa, the accuracy goes from 0.15 to 0.4 on one test.
+- eval/functional: fixed the `confidence_interval` to be able to customize the confidence level.
+
+### Added
+Auto-grad system to support retriever and any component:
+- `GradComponent` has a default `forward` which wraps the `call` to handle the auto-grad automatically for any component that has subclassed `GradComponent`.
+- Clarified the `ParamType` to include `input`, `output`, `hyperparam` instead of following PyTorch's tensor and Parameter design pattern.
+- `TraceGraph` of the `Parameter` at `draw_graph` to support `ParamType`.
 ## [0.2.2] - 2024-09-09
 ### Added
 - `get_cache_path`, instead of print out the cache path all the time, we add a ``get_cache_path`` to get the cache path.
diff --git a/adalflow/adalflow/__init__.py b/adalflow/adalflow/__init__.py
index a1bb564a..41af85ed 100644
--- a/adalflow/adalflow/__init__.py
+++ b/adalflow/adalflow/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.2.2"
+__version__ = "0.2.3.beta.1"
 
 from adalflow.core.component import Component, fun_to_component
 from adalflow.core.container import Sequential
@@ -24,6 +24,7 @@
     ListParser,
     BooleanParser,
 )
+from adalflow.core.retriever import Retriever
 from adalflow.components.output_parsers import (
     YamlOutputParser,
     JsonOutputParser,
@@ -70,6 +71,7 @@
     "ModelClient",
     "Generator",
     "Embedder",
+    "Retriever",
     "Parameter",
     "AdalComponent",
     "Trainer",
diff --git a/adalflow/adalflow/components/reasoning/__init__.py b/adalflow/adalflow/components/reasoning/__init__.py
deleted file mode 100644
index f9340a77..00000000
--- a/adalflow/adalflow/components/reasoning/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .chain_of_thought import *  # noqa: F401, F403
diff --git a/adalflow/adalflow/components/reasoning/chain_of_thought.py b/adalflow/adalflow/components/reasoning/chain_of_thought.py
deleted file mode 100644
index 432613ca..00000000
--- a/adalflow/adalflow/components/reasoning/chain_of_thought.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""
-Chain of the thought(CoT) is to mimic a step-by-step thought process for arriving at the answer.
-
-https://arxiv.org/abs/2201.11903, published in Jan, 2023
-
-Chain of the thought(CoT) is to mimic a step-by-step thought process for arriving at the answer. You can achieve it in two ways:
-1. Add instructions such as "Let's think step-by-step to answer this question".
-2. Add few-shot examples such as
-'
-Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can has 3 tennis balls. How many tennis balls does he have now?
-A: Roger started with 5 balls. 2 cansof 3 tennis balls each is 6 tennis balls. 5 + 6 = 11. The answer is 11.
-'
-
-NOTE: CoT can be helpful for more complicated task, it also varies from task to task and model to model.
-For instance, CoT might already be supported in gpt3.5+ api calls.
-
-Benchmark it with and without CoT to see if it helps.
-"""
-
-# from core.component import Component
-# from core.generator import Generator
-# from core.string_parser import JsonParser
-# from core.model_client import ModelClient
-# from core.default_prompt_template import DEFAULT_LIGHTRAG_SYSTEM_PROMPT
-
-
-COT_TASK_DESC_STR_BASIC = (
-    "You are a helpful assistant. Let's think step-by-step to answer user's query."
-)
-# Using triple quotes to include JSON-like structure more cleanly
-COT_TASK_DESC_STR_WITH_JSON_OUTPUT = f"""
-{COT_TASK_DESC_STR_BASIC} Output JSON format: {{"thought": "<The thought process to answer the query>", "answer": "<The answer to the query>"}}
-"""
-
-
-# ChainOfThought will just be a generator with preset_prompt_kwargs of the task_desc_str = COT_TASK_DESC_STR
-# additional you can ask it to generate a json with "thought" and "anwer" keys and use jsonParser
-
-
-# class CoTGenerator(Generator):
-#     r"""
-#     CoTGenerator is a subclass of Generator with default task_desc_str preset for Chain of Thought.
-#     Output will be string.
-#     It is exactly the same as using a Generator.
-#     Example:
-#     ```
-#     cot = CoTGenerator(model_client=model_client, model_kwargs={"model": model})
-#     ```
-#     """
-
-#     def __init__(
-#         self,
-#         *,
-#         model_client: ModelClient,
-#         model_kwargs: Dict = {},
-#         template: Optional[str] = None,
-#         preset_prompt_kwargs: Optional[Dict] = None,
-#         output_processors: Optional[Component] = None,
-#     ) -> None:
-
-#         super().__init__(
-#             model_client=model_client,
-#             model_kwargs=model_kwargs,
-#             template=template or DEFAULT_LIGHTRAG_SYSTEM_PROMPT,
-#             preset_prompt_kwargs=preset_prompt_kwargs
-#             or {"task_desc_str": COT_TASK_DESC_STR_BASIC},
-#             output_processors=output_processors,
-#         )
-
-
-# class CoTGeneratorWithJsonOutput(Generator):
-#     r"""
-#     CoTGeneratorWithJsonOutput is a subclass of Generator with default task_desc_str preset for Chain of Thought.
-#     Output will be parsed as JSON with "thought" and "answer" keys.
-#     Example:
-#     ```
-#     cot = CoTGeneratorWithJsonOutput(model_client=model_client, model_kwargs={"model": model})
-#     ```
-#     """
-
-#     def __init__(
-#         self,
-#         *,
-#         model_client: ModelClient,
-#         model_kwargs: Dict = {},
-#         template: Optional[str] = None,
-#         preset_prompt_kwargs: Optional[Dict] = None,
-#         output_processors: Optional[Component] = None,
-#     ) -> None:
-
-#         super().__init__(
-#             model_client=model_client,
-#             model_kwargs=model_kwargs,
-#             template=template or DEFAULT_LIGHTRAG_SYSTEM_PROMPT,
-#             preset_prompt_kwargs=preset_prompt_kwargs
-#             or {"task_desc_str": COT_TASK_DESC_STR_WITH_JSON_OUTPUT},
-#             output_processors=output_processors or JsonParser(),
-#         )
diff --git a/adalflow/adalflow/core/README.md b/adalflow/adalflow/core/README.md
deleted file mode 100644
index 4aa0c220..00000000
--- a/adalflow/adalflow/core/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-"""
-test file: tests/test_tool.py
-
-Different from llamaindex which defines these types of tools:
-- FunctionTool
-- RetrieverTool
-- QueryEngineTool
--...
-Llamaindex: BaseTool->AsyncBaseTool->FunctionTool
-Our tool is an essential callable object (similar to the function tool) that you can wrap in any other parts such as retriever, generator in.
-TO support:
-- sync tool
-- async tool
-TODO: to observe and improve the mix of sync and async tools in the future.
-How can we know after the llm call that a function tool is sync or async?
-"""
-
-Tool can be under `/adalflow`.
diff --git a/adalflow/adalflow/core/base_data_class.py b/adalflow/adalflow/core/base_data_class.py
index 0ba4b823..daac546d 100644
--- a/adalflow/adalflow/core/base_data_class.py
+++ b/adalflow/adalflow/core/base_data_class.py
@@ -27,7 +27,15 @@
     represent_ordereddict,
 )
 
-
+__all__ = [
+    "DataClass",
+    "DataClassFormatType",
+    "required_field",
+    "ExcludeType",
+    "IncludeType",
+    "check_adal_dataclass",
+    "DynamicDataClassFactory",
+]
 logger = logging.getLogger(__name__)
 
 
@@ -125,7 +133,8 @@ class DataClass:
     Overall, we have a unified class method :meth:`format_str` to generate formatted output based on the type of operation and class/instance context.
 
     note::
-        You do not need to use our format, overwrite any method in the subclass to fit in your needs.
+        1. Avoid using Optional[Type] for the type of fields, as dataclass already distingushes between optional and required fields using default value.
+        2. If you need to customize, you can subclass and overwrite any method to fit your needs.
 
     Loading data:
 
@@ -176,8 +185,8 @@ class MyOutputs(DataClass):
         # name: John Doe
 
     """
-    __input_fields__ = []
-    __output_fields__ = []
+    __input_fields__: List[str] = []
+    __output_fields__: List[str] = []
 
     def __post_init__(self):
 
@@ -687,9 +696,6 @@ def format_example_str(
         else:
             raise ValueError(f"Unsupported format type: {format_type}")
 
-    # TODO:support Generic[Type[T]] for the type of fields
-    # it will automatically use __type_var_map__ attribute
-
 
 def check_adal_dataclass(data_class: Type) -> None:
     """Check if the provided class is a valid dataclass for the AdalFlow framework.
diff --git a/adalflow/adalflow/core/db.py b/adalflow/adalflow/core/db.py
index 5062ded5..aabf762c 100644
--- a/adalflow/adalflow/core/db.py
+++ b/adalflow/adalflow/core/db.py
@@ -23,7 +23,7 @@
 # TODO: DB clarity can be further improved
 @dataclass
 class LocalDB(Generic[T], Component):
-    __doc__ = r"""LocalDB with in-memory CRUD operations, data transformation/processing pipelines, and persistence.
+    __doc__ = """LocalDB with in-memory CRUD operations, data transformation/processing pipelines, and persistence.
 
     LocalDB is highly flexible.
     1. It can store any type of data items in the `items` attribute.
@@ -126,10 +126,7 @@ def length(self):
     def get_transformer_keys(self) -> List[str]:
         return list(self.transformed_items.keys())
 
-    # def get_transformed_data(self, key: str) -> List[U]:
-    #     """Get the transformed items by key."""
-    #     return self.transformed_items[key]
-
+    # TODO: combine this to fetch_transformed_items
     def get_transformed_data(
         self, key: str, filter_fn: Callable[[Any], bool] = lambda x: True
     ) -> List[U]:
diff --git a/adalflow/adalflow/core/embedder.py b/adalflow/adalflow/core/embedder.py
index 89aac0c5..ca6d5cac 100644
--- a/adalflow/adalflow/core/embedder.py
+++ b/adalflow/adalflow/core/embedder.py
@@ -15,6 +15,7 @@
 from adalflow.core.component import Component
 import adalflow.core.functional as F
 
+__all__ = ["Embedder", "BatchEmbedder"]
 
 log = logging.getLogger(__name__)
 
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
index 81d2cf8f..d3662063 100644
--- a/adalflow/adalflow/core/generator.py
+++ b/adalflow/adalflow/core/generator.py
@@ -41,6 +41,9 @@
     OBJECTIVE_INSTRUCTION_CHAIN,
 )
 
+__all__ = ["Generator", "BackwardEngine", "create_teacher_generator"]
+
+
 log = logging.getLogger(__name__)
 
 PromptArgType = Dict[str, Union[str, Parameter]]
@@ -66,7 +69,8 @@ class Generator(GradComponent, CachedEngine, CallbackManager):
         trainable_params (Optional[List[str]], optional): The list of trainable parameters. Defaults to [].
 
     Note:
-        The output_processors will be applied to the string output of the model completion. And the result will be stored in the data field of the output. And we encourage you to only use it to parse the response to data format you will use later.
+        The output_processors will be applied to the string output of the model completion. And the result will be stored in the data field of the output.
+        And we encourage you to only use it to parse the response to data format you will use later.
     """
 
     model_type: ModelType = ModelType.LLM
@@ -264,6 +268,7 @@ def _compose_model_kwargs(self, **model_kwargs) -> Dict:
             combined_model_kwargs.update(model_kwargs)
         return combined_model_kwargs
 
+    # TODO: use prompt_kwargs as users are already familiar with it
     def print_prompt(self, **kwargs) -> str:
         return self.prompt.print_prompt(**kwargs)
 
@@ -334,7 +339,8 @@ def _model_client_call(self, api_kwargs: Dict, use_cache: bool = False) -> Any:
             raise e
 
     ##############################################################################################################
-    ### Forward and backwards, and teacher generator are for training
+    ### Forward, backwards, teacher generator, create demo data instance,
+    # are for training and backpropagation
     ##############################################################################################################
 
     def create_demo_data_instance(
@@ -343,6 +349,10 @@ def create_demo_data_instance(
         output: GeneratorOutput,
         id: Optional[str] = None,
     ):
+        r"""Automatically create a demo data instance from the input and output of the generator.
+        Used to trace the demos for the demo paramter in the prompt_kwargs.
+        Part of the few-shot learning.
+        """
         from adalflow.core.base_data_class import DynamicDataClassFactory
 
         # map the input fields
@@ -352,7 +362,10 @@ def create_demo_data_instance(
         )
 
         for k, v in input_prompt_kwargs.items():
-            demo_data[k] = v
+            if isinstance(v, Parameter):
+                demo_data[k] = v.map_to_successor(self)
+            else:
+                demo_data[k] = v
         # map the output fields
         for key, value in demo_data_class_output_mapping.items():
             demo_data[key] = value(output)
@@ -473,15 +486,10 @@ def forward(
                 raise ValueError(
                     "ID is required for tracing. Please pass it to your Geneartor call."
                 )
-            input_prompt_kwargs = {
-                k: v.data if isinstance(v, Parameter) else v
-                for k, v in prompt_kwargs.items()
-            }
 
             demo = self.create_demo_data_instance(
-                input_prompt_kwargs,
+                prompt_kwargs,
                 output,
-                # self._demo_data_class_output_mapping,
                 id=id,
             )
             demo_param.add_to_trace(demo, is_teacher=self.teacher_mode)
@@ -842,7 +850,8 @@ def _extra_repr(self) -> str:
 
     def to_dict(self) -> Dict[str, Any]:
         r"""Convert the generator to a dictionary."""
-        # exclude default functions
+        # TODO: exclude default functions
+        return super().to_dict()
 
     @staticmethod
     def failure_message_to_backward_engine(
diff --git a/adalflow/adalflow/core/retriever.py b/adalflow/adalflow/core/retriever.py
index 6035aff8..bcde901f 100644
--- a/adalflow/adalflow/core/retriever.py
+++ b/adalflow/adalflow/core/retriever.py
@@ -1,6 +1,6 @@
 r"""The base class for all retrievers who in particular retrieve documents from a given database."""
 
-from typing import List, Optional, Generic, Any, Callable, Union, TYPE_CHECKING
+from typing import List, Optional, Generic, Any, Callable, TYPE_CHECKING, Union
 import logging
 
 from adalflow.core.types import (
@@ -14,12 +14,13 @@
 
 if TYPE_CHECKING:
     from adalflow.core.generator import Generator
-from adalflow.optim.parameter import Parameter, ParameterType
-from adalflow.optim.function import BackwardContext
+from adalflow.optim.parameter import Parameter
+from adalflow.optim.types import ParameterType
 
 log = logging.getLogger(__name__)
 
 
+# TODO: tracing retriever in the diagnose files using callback manager
 class Retriever(GradComponent, Generic[RetrieverDocumentType, RetrieverQueryType]):
     __doc__ = r"""The base class for all retrievers.
 
@@ -41,6 +42,8 @@ class Retriever(GradComponent, Generic[RetrieverDocumentType, RetrieverQueryType
 
     indexed: bool = False
     index_keys: List[str] = []  # attributes that define the index
+    name: str = "Retriever"
+    top_k: int
 
     def __init__(self, *args, **kwargs):
         super().__init__()
@@ -92,46 +95,38 @@ async def acall(
     ) -> RetrieverOutputType:
         raise NotImplementedError("Async retrieve is not implemented")
 
+    # TODO: adapt the generator to auto-track the prompt_kwargs as parameters
     def forward(
         self,
         input: Union[RetrieverQueriesType, Parameter],
         top_k: Optional[
             int
         ] = None,  # TODO: top_k can be trained in the future if its formulated as a parameter
-        id: Optional[str] = None,
         **kwargs,
     ) -> Parameter:
-        r"""Training mode which will deal with parameter as predecessors"""
-        input_args = {"input": input, "top_k": top_k, "id": id}
-        predecessors = [p for p in [input, top_k, id] if isinstance(p, Parameter)]
-
-        input_args_values = {}
-        for k, v in input_args.items():
-            if isinstance(v, Parameter):
-                input_args_values[k] = v.data
-            else:
-                input_args_values[k] = v
-
-        retriever_reponse = self.call(**input_args_values)
-
-        response = Parameter(
-            data=retriever_reponse,
-            name=self.name + "_output",
-            role_desc="Retriever response",
-            input_args=input_args,
-            full_response=retriever_reponse,
-        )
-        response.set_predecessors(predecessors)
-        response.trace_forward_pass(
-            input_args=input_args, full_response=retriever_reponse
-        )
-        response.set_grad_fn(
-            BackwardContext(
-                backward_fn=self.backward,
-                response=response,
-                id=id,
+        r"""Customized forward on top of the GradComponent forward method.
+
+        To track the input as Parameters and set the parameter type as RETRIEVER_OUTPUT in the response.
+        """
+        # convert input to parameter if it is not
+        if not isinstance(input, Parameter):
+            input = Parameter(
+                data=input,
+                name="input",
+                requires_opt=True,
+                param_type=ParameterType.INPUT,
             )
+        # trace the top_k in the DAG graph
+        top_k = Parameter(
+            data=top_k or self.top_k,
+            name="top_k",
+            requires_opt=True,
+            param_type=ParameterType.HYPERPARAM,
         )
+        response = super().forward(input, top_k=top_k, **kwargs)
+        response.param_type = (
+            ParameterType.RETRIEVER_OUTPUT
+        )  # be more specific about the type
         return response
 
     def backward(
@@ -142,28 +137,4 @@ def backward(
     ):
         r"""Backward the response to pass the score to predecessors"""
         log.info(f"Retriever backward: {response}")
-        children_params = response.predecessors
-        if not self.tracing:
-            return
-        # backward score to the demo parameter
-        for pred in children_params:
-            if pred.requires_opt:
-                # pred._score = float(response._score)
-                pred.set_score(response._score)
-                log.debug(
-                    f"backpropagate the score {response._score} to {pred.name}, is_teacher: {self.teacher_mode}"
-                )
-                if pred.param_type == ParameterType.DEMOS:
-                    # Accumulate the score to the demo
-                    pred.add_score_to_trace(
-                        trace_id=id, score=response._score, is_teacher=self.teacher_mode
-                    )
-                    log.debug(f"Pred: {pred.name}, traces: {pred._traces}")
-
-    # def __call__(self, *args, **kwargs) -> Union[RetrieverOutputType, Any]:
-    #     if self.training:
-    #         log.debug("Training mode")
-    #         return self.forward(*args, **kwargs)
-    #     else:
-    #         log.debug("Inference mode")
-    #         return self.call(*args, **kwargs)
+        pass
diff --git a/adalflow/adalflow/datasets/hotpot_qa.py b/adalflow/adalflow/datasets/hotpot_qa.py
index 4bbf76df..dd4b414b 100644
--- a/adalflow/adalflow/datasets/hotpot_qa.py
+++ b/adalflow/adalflow/datasets/hotpot_qa.py
@@ -7,7 +7,6 @@
 
 
 from adalflow.utils.data import Dataset
-from adalflow.utils.global_config import get_adalflow_default_root_path
 from adalflow.utils.file_io import save_csv
 from adalflow.datasets.utils import prepare_dataset_path
 from adalflow.core.base_data_class import DataClass
@@ -30,15 +29,16 @@ def __init__(
         if keep_details not in ["all", "dev_titles", "none"]:
             raise ValueError("Keep details must be one of 'all', 'dev_titles', 'none'")
 
-        if root is None:
-            root = get_adalflow_default_root_path()
-            print(f"Saving dataset to {root}")
+        # if root is None:
+        #     root = get_adalflow_default_root_path()
+        #     print(f"Saving dataset to {root}")
         self.root = root
-        task_name = f"hotpot_qa_{keep_details}"
-        data_path = prepare_dataset_path(self.root, task_name, split)
+        self.task_name = f"hotpot_qa_{keep_details}"
+        data_path = prepare_dataset_path(self.root, self.task_name)
         # download and save
+        split_csv_path = os.path.join(data_path, f"{split}.csv")
         self._check_or_download_dataset(
-            task_name, data_path, split, only_hard_examples, keep_details
+            split_csv_path, split, only_hard_examples, keep_details
         )
 
         # load from csv
@@ -46,7 +46,7 @@ def __init__(
         # created_data_class = DynamicDataClassFactory.from_dict(
         #  "HotPotQAData", {"id": "str", "question": "str", "answer": "str"}
 
-        with open(data_path, newline="") as csvfile:
+        with open(split_csv_path, newline="") as csvfile:
             reader = csv.DictReader(csvfile)
             for i, row in enumerate(reader):
                 if size is not None and i >= size:
@@ -55,12 +55,21 @@ def __init__(
 
     def _check_or_download_dataset(
         self,
-        task_name: str,
         data_path: str = None,
         split: str = "train",
         only_hard_examples=True,
         keep_details="dev_titles",
     ):
+        r"""It will download data from huggingface datasets and split it and save it into three csv files.
+        Args:
+            data_path (str): The path to save the data. In particular with split name appended.
+            split (str): The dataset split, supports ``"train"`` (default), ``"val"`` and ``"test"``. Decides which split to return.
+            only_hard_examples (bool): If True, only hard examples will be downloaded.
+            keep_details (str): If "all", all details will be kept. If "dev_titles", only dev titles will be kept.
+        """
+
+        if data_path is None:
+            raise ValueError("data_path must be specified")
 
         if os.path.exists(data_path):
             return
@@ -136,8 +145,8 @@ def _check_or_download_dataset(
             ["train", "val", "test"],
             [sampled_trainset, sampled_valset, test],
         ):
-            target_path = prepare_dataset_path(self.root, task_name, split)
-            save_csv(examples, f=target_path, fieldnames=keys)
+            # target_path = prepare_dataset_path(self.root, task_name, split)
+            save_csv(examples, f=data_path, fieldnames=keys)
 
         if split == "train":
             return sampled_trainset
diff --git a/adalflow/adalflow/eval/answer_match_acc.py b/adalflow/adalflow/eval/answer_match_acc.py
index 4957cdef..7a9fa8f7 100644
--- a/adalflow/adalflow/eval/answer_match_acc.py
+++ b/adalflow/adalflow/eval/answer_match_acc.py
@@ -64,6 +64,8 @@ def compute_single_item(
         if self.type == "exact_match":
             return 1.0 if y == y_gt else 0.0
         elif self.type == "fuzzy_match":
+            y = y.lower()
+            y_gt = y_gt.lower()
             return 1.0 if y_gt in y else 0.0
         else:
             raise NotImplementedError
diff --git a/adalflow/adalflow/eval/functional.py b/adalflow/adalflow/eval/functional.py
index aa436bc1..e93e2b0c 100644
--- a/adalflow/adalflow/eval/functional.py
+++ b/adalflow/adalflow/eval/functional.py
@@ -15,14 +15,28 @@ def confidence_interval(
     Returns:
         tuple: Lower and upper bounds of the confidence interval.
     """
+    if not isinstance(judgements, list):
+        raise ValueError("judgements must be a list")
+
+    if not all(isinstance(j, (int, float)) for j in judgements):
+        raise ValueError("judgements must contain only integers or floats")
+
+    if not isinstance(confidence, (int, float)):
+        raise ValueError("confidence must be a number")
+
+    if not 0 < confidence < 1:
+        raise ValueError("confidence must be between 0 and 1")
+
     # Step 1: Calculate the mean
     mean_score = np.mean(judgements)
 
     # Step 2: Calculate the standard error (SE)
     standard_error = np.std(judgements, ddof=1) / np.sqrt(len(judgements))
 
-    # Step 3: Use the Z-critical value for the confidence interval
-    z_critical = 1.96  # For a 95% CI with a normal distribution
+    # Step 3: Use the Z-critical value for the confidence interval based on confidence level
+    z_critical = np.percentile(
+        np.random.normal(0, 1, 1000000), 100 * (1 - (1 - confidence) / 2)
+    )
 
     # Step 4: Calculate the margin of error (MoE)
     margin_of_error = z_critical * standard_error
@@ -71,7 +85,7 @@ def longest_common_substring(s1: str, s2: str) -> str:
 if __name__ == "__main__":
     # Example binary judgments (True/False as 1/0)
     judgements = [1, 1, 0, 1, 0, 1, 1]  # Convert to 1/0
-    score_range = confidence_interval(judgements)
+    score_range = confidence_interval(judgements, confidence=0.96)
     print(score_range)
 
     # Example longest common substring
diff --git a/adalflow/adalflow/eval/g_eval.py b/adalflow/adalflow/eval/g_eval.py
index e0494ce8..5bdda219 100644
--- a/adalflow/adalflow/eval/g_eval.py
+++ b/adalflow/adalflow/eval/g_eval.py
@@ -12,6 +12,8 @@
 from adalflow.eval.llm_as_judge import DEFAULT_LLM_EVALUATOR_MODEL_KWARGS
 from adalflow.core.string_parser import FloatParser
 
+__all__ = ["GEvalMetric", "NLGTask", "GEvalLLMJudge", "GEvalJudgeEvaluator"]
+
 log = logging.getLogger(__name__)
 
 
@@ -256,5 +258,5 @@ def compute(
         return final_output, output
 
     def __str__(self) -> str:
-        s = f"llm_judge={self.llm_evaluator}, prompt_kwargs={self.llm_judge.prompt_kwargs}"
+        s = f"llm_judge={self.llm_judge}, prompt_kwargs={self.llm_judge.prompt_kwargs}"
         return s
diff --git a/adalflow/adalflow/eval/llm_as_judge.py b/adalflow/adalflow/eval/llm_as_judge.py
index 241e11f4..d7461bad 100644
--- a/adalflow/adalflow/eval/llm_as_judge.py
+++ b/adalflow/adalflow/eval/llm_as_judge.py
@@ -14,6 +14,7 @@
 from adalflow.eval.base import BaseEvaluator
 from adalflow.eval.functional import confidence_interval
 
+__all__ = ["DefaultLLMJudge", "LLMasJudge", "LLMJudgeEvalResult"]
 
 log = logging.getLogger(__name__)
 
diff --git a/adalflow/adalflow/optim/grad_component.py b/adalflow/adalflow/optim/grad_component.py
index b200acc6..016c08db 100644
--- a/adalflow/adalflow/optim/grad_component.py
+++ b/adalflow/adalflow/optim/grad_component.py
@@ -1,12 +1,20 @@
 """Base class for Autograd Components that can be called and backpropagated through."""
 
 from typing import TYPE_CHECKING
+from collections import OrderedDict
+import logging
 
 if TYPE_CHECKING:
     from adalflow.core.generator import BackwardEngine
     from adalflow.optim.parameter import Parameter
 
+from adalflow.optim.types import ParameterType
+
 from adalflow.core.component import Component
+from adalflow.optim.function import BackwardContext
+
+__all__ = ["GradComponent"]
+log = logging.getLogger(__name__)
 
 
 class GradComponent(Component):
@@ -15,7 +23,7 @@ class GradComponent(Component):
     Compared with `Component`, `GradComponent` defines three important interfaces:
     - `forward`: the forward pass of the function, returns a `Parameter` object that can be traced and backpropagated.
     - `backward`: the backward pass of the function, updates the gradients/prediction score backpropagated from a "loss" parameter.
-    - `set_backward_engine`: set the backward engine(a form of generator) to the component, which is used to backpropagate the gradients.
+    - `set_backward_engine`: set the backward engine(a form of generator) to the component, which is used to backpropagate the gradients using LLM.
 
     The __call__ method will check if the component is in training mode,
     and call the `forward` method to return a `Parameter` object if it is in training mode,
@@ -32,7 +40,6 @@ def __call__(self, *args, **kwargs):
         if self.training:
             return self.forward(*args, **kwargs)
         else:
-            print("calling the call method")
             return self.call(*args, **kwargs)
 
     def set_backward_engine(self, backward_engine: "BackwardEngine", *args, **kwargs):
@@ -41,10 +48,91 @@ def set_backward_engine(self, backward_engine: "BackwardEngine", *args, **kwargs
     def call(self, *args, **kwargs):
         raise NotImplementedError("call method is not implemented")
 
+    async def acall(self, *args, **kwargs):
+        r"""Implement this for your async call."""
+        raise NotImplementedError("acall method is not implemented")
+
     def forward(self, *args, **kwargs) -> "Parameter":
-        r"""Default just wraps the call method."""
+        r"""Default forward method for training:
+        1. for all args and kwargs, if it is a `Parameter` object, it will be tracked as `Predecessor`.
+        2. Trace input_args and full_response in the parameter object.
+        3. Return the parameter object.
+
+        TODO: all Gradcomponent should not allow args but only kwargs.
+        For now, just check if id is in kwargs.
+        """
+
+        from adalflow.optim.parameter import Parameter
+
+        log.debug(
+            f"Forwarding through {self.name} with args: {args} and kwargs: {kwargs}"
+        )
+
+        # if "id" not in kwargs:
+        #     raise ValueError(
+        #         "id must be provided in the kwargs of a GradComponent for tracing."
+        #     )
+
+        # 1. get all predecessors from all args and kwargs
+        input_args = OrderedDict()
+
+        # Add positional args to the ordered dict
+        for idx, arg in enumerate(args):
+            input_args[f"arg_{idx}"] = arg
+
+        # Add keyword args to the ordered dict, preserving order
+        predecessors = []
+        for v in input_args.values():
+            if isinstance(v, Parameter):
+                predecessors.append(v)
+        for v in kwargs.values():
+            if isinstance(v, Parameter):
+                predecessors.append(v)
+
+        # 2. unwrap the parameter object to take only the data, successor_map_fn: lambda x: x.data in default
+        # unwrap args
+        unwrapped_args = []
+        for k, v in input_args.items():
+            if isinstance(v, Parameter):
+                unwrapped_args.append(v.map_to_successor(self))
+            else:
+                unwrapped_args.append(v)
+
+        unwrapped_kwargs = {}
+        # unwrap kwargs
+        for k, v in kwargs.items():
+            if isinstance(v, Parameter):
+                unwrapped_kwargs[k] = v.map_to_successor(self)
+            else:
+                unwrapped_kwargs[k] = v
+
+        # 3. call the function with unwrapped args and kwargs
+        unwrapped_args = tuple(unwrapped_args)
+
+        log.debug(f"Unwrapped args: {unwrapped_args}")
+        log.debug(f"Unwrapped kwargs: {unwrapped_kwargs}")
+
+        call_response = self.call(*unwrapped_args, **unwrapped_kwargs)
 
-        raise NotImplementedError("forward method is not implemented")
+        # 4. Create a Parameter object to trace the forward pass
+        input_args.update(kwargs)
+        response = Parameter(
+            data=call_response,
+            name=self.name + "_output",
+            role_desc=self.name + " response",
+            param_type=ParameterType.OUTPUT,
+        )
+        response.set_predecessors(predecessors)
+        response.trace_forward_pass(input_args=input_args, full_response=call_response)
+        response.set_grad_fn(
+            BackwardContext(
+                backward_fn=self.backward,
+                response=response,
+                id=kwargs.get("id", None),
+            )
+        )
+        return response
 
     def backward(self, *args, **kwargs):
-        raise NotImplementedError("backward method is not implemented")
+        pass
+        # raise NotImplementedError("backward method is not implemented")
diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py
index abad6e0c..a6f9459b 100644
--- a/adalflow/adalflow/optim/parameter.py
+++ b/adalflow/adalflow/optim/parameter.py
@@ -1,6 +1,17 @@
 """Parameter is used by Optimizer, Trainers, AdalComponent to auto-optimizations"""
 
-from typing import Generic, TypeVar, Any, List, Set, Dict, Tuple, Optional, Literal
+from typing import (
+    Generic,
+    TypeVar,
+    Any,
+    List,
+    Set,
+    Dict,
+    Tuple,
+    Optional,
+    Literal,
+    Callable,
+)
 from collections import defaultdict
 import logging
 from dataclasses import dataclass, field
@@ -79,12 +90,22 @@ class Parameter(Generic[T]):
     1. https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
     """
 
+    id: str = None  # Unique id of the parameter
+    name: str = None  # Name of the parameter, easier to read for humans
+    role_desc: str = ""  # Description of the role of the parameter
+    data: T = None  # Data of the parameter
+    param_type: ParameterType
+
     proposing: bool = False  # State of the parameter
     predecessors: Set["Parameter"] = set()  # Predecessors of the parameter
     peers: Set["Parameter"] = set()  # Peers of the parameter
+    # TODO: input_args should be OrderedDict to keep the order of args
     input_args: Dict[str, Any] = None  # Input arguments of the GradComponent forward
     full_response: object = None  # Full response of the GradComponent output
     eval_input: object = None  # Eval input passing to the eval_fn or evaluator you use
+    successor_map_fn: Dict[str, Callable] = (
+        None  # Map function to get the data from the output
+    )
     from_response_id: str = (
         None  # for parameterType GRADIENT, the id of the response parameter
     )
@@ -108,6 +129,7 @@ def __init__(
         score: Optional[float] = None,
         eval_input: object = None,
         from_response_id: Optional[str] = None,
+        successor_map_fn: Optional[Dict[str, Callable]] = None,
     ):
         self.id = id or str(uuid.uuid4())
 
@@ -155,6 +177,20 @@ def __init__(
         self.eval_input = eval_input
 
         self.from_response_id = from_response_id  # for gradient parameter
+        self.successor_map_fn = successor_map_fn or {}
+
+    def map_to_successor(self, successor: object) -> T:
+        """Apply the map function to the successor based on the successor's id."""
+        successor_id = id(successor)
+        if successor_id not in self.successor_map_fn:
+            default_map_fn = lambda x: x.data  # noqa: E731
+            return default_map_fn(self)
+
+        return self.successor_map_fn[successor_id](self)
+
+    def add_successor_map_fn(self, successor: object, map_fn: Callable):
+        """Add or update a map function for a specific successor using its id."""
+        self.successor_map_fn[id(successor)] = map_fn
 
     def check_if_already_computed_gradient_respect_to(self, response_id: str) -> bool:
         from_response_ids = [g.from_response_id for g in self.gradients]
@@ -380,7 +416,9 @@ def build_graph(node: "Parameter"):
         build_graph(root)
         return nodes, edges
 
-    def backward(self):  # engine should be the llm
+    def backward(
+        self,
+    ):  # engine should be the llm or customized backwards function to pass feedback
 
         # topological sort of all the predecessors of the current parameter in the graph
         log.debug(f"Backward pass for {self.data}, backward function: {self.grad_fn}")
@@ -505,6 +543,8 @@ def wrap_and_escape(text, width=40):
                 node_label += f"<tr><td><b><font color='{label_color}'>Previous Value: </font></b></td><td>{wrap_and_escape(n.previous_data)}</td></tr>"
             if n.requires_opt:
                 node_label += f"<tr><td><b><font color='{label_color}'>Requires Optimization: </font ></b></td><td>{{'Yes'}}</td></tr>"
+            if n.param_type:
+                node_label += f"<tr><td><b><font color='{label_color}'>Type: </font></b></td><td>{wrap_and_escape(n.param_type.name)}</td></tr>"
             if add_grads:
                 node_label += f"<tr><td><b><font color='{label_color}'>Gradients: </font></b></td><td>{wrap_and_escape(n.get_gradients_names())}</td></tr>"
                 # add a list of each gradient with short value
diff --git a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
index 6226df12..f2d5b918 100644
--- a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
+++ b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
@@ -25,58 +25,6 @@
 
 log = logging.getLogger(__name__)
 
-GLOSSARY_TEXT = r"""
-### Glossary of tags that will be sent to you:
-{# # - <LM_SYSTEM_PROMPT>: The system prompt for the language model.
-# - <LM_INPUT>: The input to the language model.
-# - <LM_OUTPUT>: The output of the language model. #}
-# - <FEEDBACK>: The feedback to the variable.
-# - <CONVERSATION>: The conversation history.
-# - <FOCUS>: The focus of the optimization.
-# - <ROLE>: The role description of the variable."""
-
-# customize the system prompt
-# prompts, solutions to problems, code, or any other text-based variable. -> to the variable type.
-# The optimizer will have an understanding of different variable types.
-
-# Tips:
-# - DO NOT address concerns on the peer variables. The peer variables will be optimized separately.
-# - The instruction needs to be on point, clear, and accurate.
-# You can take the following actions:
-# - You can delete words or phrases that you think are not necessary or you find misleading.
-# - You can add new words or phrases that you think can address the feedback.
-# - You can add sections like "Tips" or "Remember" to fix the issue.
-# - You can be creative and write the variable in a different way.#}
-
-# Think step by step. But keep the reasoning concise!!!
-# Start response with "because" and "since" to provide reasoning.
-# The last line of your response will be the new variable value put in between {{new_variable_start_tag}} and {{new_variable_end_tag}} tags.
-
-# {#You are part of an optimization system that improves the exsiting variable value according to feedback.
-
-# You will be asked to creatively and critically at proposing new variable value.
-# The feedback may be noisy, focus on what is important.
-# {# output format #}
-# You MUST ONLY output the new variable value in the response between {{new_variable_start_tag}} and {{new_variable_end_tag}} tags.
-# {% if instruction_to_optimizer %}
-# USER INSTRUCTION: {{instruction_to_optimizer}}
-# {% endif %}
-# Remember:
-# - Pay attention to role description, and most importantly, the feedback.
-# - Must provide a different value that addresses the feedback.
-# - If <HISTORY_PERFORMANCE> is provided, observe the patterns of the past iterations, pay close attention to high-performing value and aim to improve the variable value.
-# - The new value better not overlap with the scope of peer variables.
-# - Make your best judgment and be creative.
-# TIPS:
-# - Delete words or phrases that you think are not necessary or you find misleading.
-# - Add new words or phrases that you think can address the feedback.
-# - Add sections like "Tips" or "Remember" to fix the issue.
-# - Be eative and write the variable in a different way.#}
-
-# Your task: Propose a new variable value in response to the feedback.
-# 1. Focus on what is essential, even if feedback is noisy.
-# 2. Observe past performance patterns when provided and aim to improve upon high-performing values.
-# 3. Avoid overlap with peer variables' scope.
 
 # Tips:
 # 1. Eliminate unnecessary words or phrases.
diff --git a/adalflow/adalflow/optim/trainer/adal.py b/adalflow/adalflow/optim/trainer/adal.py
index 69ae067d..3aead98d 100644
--- a/adalflow/adalflow/optim/trainer/adal.py
+++ b/adalflow/adalflow/optim/trainer/adal.py
@@ -26,11 +26,12 @@
 
 
 class AdalComponent(Component):
-    """Define a train, eval, and test step for a task pipeline.
+    __doc__ = """Define a train, eval, and test step for a task pipeline.
 
     This serves the following purposes:
-    1. Organize all parts for training a task pipeline organized in one place.
+    1. Organize all parts for training a task pipeline in one place.
     2. Help with debugging and testing before the actual training.
+    3. Adds multi-threading support for training and evaluation.
     """
 
     task: Component
@@ -705,9 +706,8 @@ def configure_text_optimizer_helper(
                 continue
             parameters.append(param)
         if not parameters:
-            raise ValueError(
-                "No text parameters found. Please define a demo parameter for your generator."
-            )
+            return []
+
         to = TGDOptimizer(
             params=parameters, model_client=model_client, model_kwargs=model_kwargs
         )
diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py
index 969d38ac..01dcf2fa 100644
--- a/adalflow/adalflow/optim/trainer/trainer.py
+++ b/adalflow/adalflow/optim/trainer/trainer.py
@@ -35,7 +35,7 @@
 
 
 class Trainer(Component):
-    r"""We make trainer a component to as a trainer itself is an LLM task pipeline too.
+    __doc__ = r"""Ready to use trainer for LLM task pipeline to optimize all types of parameters.
 
 
     Training set: can be used for passing initial proposed prompt or for few-shot sampling.
@@ -243,6 +243,7 @@ def diagnose(self, dataset: Any, split: str = "train"):
             }
             save_json(stats, os.path.join(log_dir, "stats.json"))
             print(f"Total error samples: {len(diagnose_items)}")
+            print(f"Saved diagnose to {diagnose_file}")
 
         return acc_score, acc_per_item_scores, log_paths
 
@@ -339,7 +340,6 @@ def fit(
                 )
             self.adaltask.configure_teacher_generator()
             print("Configured demo optimizers")
-            # return
         else:
             print("No trainable demo params to optimize")
             self.demo_optimizers = []
diff --git a/adalflow/adalflow/optim/types.py b/adalflow/adalflow/optim/types.py
index 860bbe98..22b0ab14 100644
--- a/adalflow/adalflow/optim/types.py
+++ b/adalflow/adalflow/optim/types.py
@@ -12,11 +12,25 @@
 class ParameterType(Enum):
     __doc__ = """Enum for the type of parameter to compute the loss with, and to inform the optimizer."""
 
+    # trainable parameters with optimizers
     PROMPT = (
         "prompt",
         "Instruction to the language model on task, data, and format.",
-    )
-    DEMOS = ("demos", "A few examples to guide the language model.")
+    )  # optimized by tgd_optimizer
+    DEMOS = (
+        "demos",
+        "A few examples to guide the language model.",
+    )  # optimized by demo_optimizer
+
+    # input and output parameters (similar to tensor, can have grad_opt true, but not trainable)
+    INPUT = ("input", "The input to the component.")
+    OUTPUT = ("output", "The output of the component.")
+    HYPERPARAM = ("hyperparam", "Hyperparameters/args for the component.")
+
+    # gradient paramters for each predecessor of dag.
+    GRADIENT = ("gradient", "A gradient parameter.")
+
+    # the following is a subtype of the output type
     # INSTANCE = ("instance", "Focus on fixing issues of this specific example.")
     GENERATOR_OUTPUT = (
         "generator_output",
@@ -25,7 +39,6 @@ class ParameterType(Enum):
     RETRIEVER_OUTPUT = ("retriever_output", "The output of the retriever.")
     LOSS_OUTPUT = ("loss", "The loss value.")
     SUM_OUTPUT = ("sum", "The sum of the losses.")
-    GRADIENT = ("gradient", "A gradient parameter.")
     NONE = ("none", "")
 
     def __init__(self, value, description):
diff --git a/adalflow/pyproject.toml b/adalflow/pyproject.toml
index a928622a..41864a2f 100644
--- a/adalflow/pyproject.toml
+++ b/adalflow/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "adalflow"
 
-version = "0.2.2"
+version = "0.2.3.beta.1"
 description = "The Library to Build and Auto-optimize Any LLM Task Pipeline"
 authors = ["Li Yin <li@sylphai.com>"]
 readme = "README.md"
diff --git a/adalflow/tests/test_grad_component.py b/adalflow/tests/test_grad_component.py
new file mode 100644
index 00000000..5ea68a1a
--- /dev/null
+++ b/adalflow/tests/test_grad_component.py
@@ -0,0 +1,79 @@
+import unittest
+import asyncio
+from unittest.mock import MagicMock, patch
+from adalflow.optim.grad_component import GradComponent
+from adalflow.optim.parameter import Parameter
+
+
+class TestGradComponent(unittest.TestCase):
+
+    def setUp(self):
+        self.component = GradComponent()
+        self.component.name = "test_component"
+        self.component.training = True
+
+    def test_initialization(self):
+        # Test if backward_engine is set to None initially
+        self.assertIsNone(self.component.backward_engine)
+
+    @patch.object(GradComponent, "forward", return_value="mock_forward")
+    @patch.object(GradComponent, "call", return_value="mock_call")
+    def test_call_in_training(self, mock_call, mock_forward):
+        # When in training mode, forward should be called
+        self.component.training = True
+        result = self.component()
+        mock_forward.assert_called_once()
+        mock_call.assert_not_called()
+        self.assertEqual(result, "mock_forward")
+
+    @patch.object(GradComponent, "forward", return_value="mock_forward")
+    @patch.object(GradComponent, "call", return_value="mock_call")
+    def test_call_not_in_training(self, mock_call, mock_forward):
+        # When not in training mode, call should be called
+        self.component.training = False
+        result = self.component()
+        mock_call.assert_called_once()
+        mock_forward.assert_not_called()
+        self.assertEqual(result, "mock_call")
+
+    def test_set_backward_engine_not_implemented(self):
+        # Test if set_backward_engine raises NotImplementedError
+        with self.assertRaises(NotImplementedError):
+            self.component.set_backward_engine("mock_backward_engine")
+
+    def test_acall_not_implemented(self):
+        # Test if acall raises NotImplementedError
+        with self.assertRaises(NotImplementedError):
+            asyncio.run(self.component.acall())
+
+    def test_forward(self):
+        self.component.call = MagicMock(return_value="mock_data")
+
+        # Create an actual Parameter instance
+        param = Parameter(data="input_data", name="test_param")
+        # param.successor_map_fn = MagicMock(side_effect=lambda x: "unwrapped_" + str(x))
+        param.add_successor_map_fn(
+            successor=self.component, map_fn=lambda x: "unwrapped_" + str(x)
+        )
+
+        args = [param]
+        kwargs = {"id": 123, "other_param": param}
+
+        # Call the forward method
+        response = self.component.forward(*args, **kwargs)
+
+        # Assert that call was invoked with unwrapped args and kwargs
+        # self.component.call.assert_called_once_with(
+        #     "unwrapped_" + str(param)  # other_param="unwrapped_" + str(param)
+        # )
+
+        self.assertEqual(isinstance(response, Parameter), True)
+        self.assertEqual(response.data, "mock_data")
+        self.assertEqual(response.full_response, "mock_data")
+        self.assertEqual(
+            len(response.predecessors), 1
+        )  #  predecessors is a set, so it should be 1
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/benchmarks/hotpot_qa/adal_exp/__init__.py b/benchmarks/hotpot_qa/adal_exp/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/benchmarks/hotpot_qa/adal_exp/build.py b/benchmarks/hotpot_qa/adal_exp/build.py
new file mode 100644
index 00000000..9f1d078c
--- /dev/null
+++ b/benchmarks/hotpot_qa/adal_exp/build.py
@@ -0,0 +1,630 @@
+"""We will use dspy's retriever to keep that the same and only use our generator and optimizer"""
+
+import dspy
+import re
+from typing import List, Union, Optional, Dict, Callable
+from dataclasses import dataclass, field
+
+import adalflow as adal
+from adalflow.optim.parameter import Parameter, ParameterType
+
+from adalflow.datasets.hotpot_qa import HotPotQA, HotPotQAData
+from adalflow.datasets.types import Example
+
+from adalflow.core.retriever import Retriever
+from adalflow.core.component import fun_to_component
+
+
+colbertv2_wiki17_abstracts = dspy.ColBERTv2(
+    url="http://20.102.90.50:2017/wiki17_abstracts"
+)
+
+dspy.settings.configure(rm=colbertv2_wiki17_abstracts)
+
+
+def load_datasets():
+
+    trainset = HotPotQA(split="train", size=20)
+    valset = HotPotQA(split="val", size=50)
+    testset = HotPotQA(split="test", size=50)
+    print(f"trainset, valset: {len(trainset)}, {len(valset)}, example: {trainset[0]}")
+    return trainset, valset, testset
+
+
+# task pipeline
+from typing import Any, Tuple
+
+from adalflow.core import Component, Generator
+
+
+# dspy format
+# Follow the following format.
+# Context: may contain relevant facts
+# Question: ${question}
+# Reasoning: Let's think step by step in order to ${produce the query}. We ...
+# Query: ${query}
+@dataclass
+class QueryRewritterData(adal.DataClass):
+    reasoning: str = field(
+        metadata={"desc": "The reasoning to produce the query"},
+    )
+    query: str = field(
+        metadata={"desc": "The query you produced"},
+    )
+
+    __output_fields__ = ["reasoning", "query"]
+
+
+@dataclass
+class AnswerData(adal.DataClass):
+    reasoning: str = field(
+        metadata={"desc": "The reasoning to produce the answer"},
+    )
+    answer: str = field(
+        metadata={"desc": "The answer you produced"},
+    )
+
+    __output_fields__ = ["reasoning", "answer"]
+
+
+query_template = """<START_OF_SYSTEM_PROMPT>
+Write a simple search query that will help answer a complex question.
+
+You will receive a context(may contain relevant facts) and a question.
+Think step by step.
+
+{{output_format_str}}
+{# Few shot demos #}
+{% if few_shot_demos is not none %}
+Here are some examples:
+{{few_shot_demos}}
+{% endif %}
+<END_OF_SYSTEM_PROMPT>
+<START_OF_USER>
+Context: {{context}}
+Question: {{question}}
+<END_OF_USER>
+"""
+
+# Library gives a standard template for easy prompt
+answer_template = """<START_OF_SYSTEM_PROMPT>
+Answer questions with short factoid answers.
+
+You will receive context(may contain relevabt facts) and a question.
+Think step by step.
+{{output_format_str}}
+{# Few shot demos #}
+{% if few_shot_demos is not none %}
+Here are some examples:
+{{few_shot_demos}}
+{% endif %}
+<END_OF_SYSTEM_PROMPT>
+<START_OF_USER>
+Context: {{context}}
+Question: {{question}}
+"""
+
+
+# @fun_to_component
+# def parse_string_query(text: str) -> str:
+#     return re.search(r"Query: (.*)", text).group(1)
+
+
+@fun_to_component
+def parse_string_answer(text: str) -> str:
+    return re.search(r"Answer: (.*)", text).group(1)
+
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class HotPotQADemoData(Example):
+    context: List[str] = field(
+        metadata={"desc": "The context to be used for answering the question"},
+        default_factory=list,
+    )
+    score: float = field(
+        metadata={"desc": "The score of the answer"},
+        default=None,
+    )
+
+
+from adalflow.core.types import RetrieverOutput, GeneratorOutput
+
+
+# Demonstrating how to wrap other retriever to adalflow retriever and be applied in training pipeline
+class DspyRetriever(Retriever):
+    def __init__(self, k=3):
+        super().__init__()
+        self.k = k
+        self.dspy_retriever = dspy.Retrieve(k=k)
+
+    def call(self, input: str) -> List[RetrieverOutput]:
+        output = self.dspy_retriever(query_or_queries=input, k=self.k)
+        print(f"dsy_retriever output: {output}")
+        final_output: List[RetrieverOutput] = []
+        documents = output.passages
+
+        final_output.append(
+            RetrieverOutput(
+                query=input,
+                documents=documents,
+                doc_indices=[],
+            )
+        )
+        print(f"final_output: {final_output}")
+        return final_output
+
+
+import adalflow as adal
+
+
+# User customize an auto-grad operator
+class MultiHopRetriever(adal.Retriever):
+    def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2):
+        super().__init__()
+
+        self.passages_per_hop = passages_per_hop
+        self.max_hops = max_hops
+
+        self.data_parser = adal.DataClassParser(
+            data_class=QueryRewritterData, return_data_class=True, format_type="yaml"
+        )
+
+        # Grad Component
+        self.query_generator = Generator(
+            name="query_generator",
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            prompt_kwargs={
+                "few_shot_demos": Parameter(
+                    name="few_shot_demos_1",
+                    data=None,
+                    role_desc="To provide few shot demos to the language model",
+                    requires_opt=True,
+                    param_type=ParameterType.DEMOS,
+                ),
+                "output_format_str": self.data_parser.get_output_format_str(),
+            },
+            template=query_template,
+            # output_processors=parse_string_query,
+            output_processors=self.data_parser,
+            use_cache=True,
+            # demo_data_class=HotPotQADemoData,
+            # demo_data_class_input_mapping={
+            #     "question": "question",
+            #     # "context": "context",
+            # },
+            # demo_data_class_output_mapping={"answer": lambda x: x.raw_response},
+        )
+        self.retrieve = DspyRetriever(k=passages_per_hop)
+
+    @staticmethod
+    def context_to_str(context: List[str]) -> str:
+        return "\n".join(context)
+
+    def call(self, *, question: str, id: str = None) -> Any:  # Add id for tracing
+        # inference mode!!!
+        # output = self.forward(question, id=id)
+
+        context = []
+        self.max_hops = 1
+        for hop in range(self.max_hops):
+            gen_out = self.query_generator(
+                prompt_kwargs={
+                    "context": self.context_to_str(context),
+                    "question": question,
+                },
+                id=id,
+            )
+            query = None
+            # TODO: the bridge between the retriever to the generator and generator to the retriever needs to be more smooth
+            if isinstance(gen_out, GeneratorOutput):
+                query = (  # noqa: F841
+                    gen_out.data.query if gen_out.data and gen_out.data.query else None
+                )
+            elif isinstance(gen_out, adal.Parameter):
+                gen_out.successor_map_fn = lambda x: (
+                    x.full_response.data.query
+                    if x.full_response and x.full_response.data
+                    else None
+                )
+                print(f"gen_out: {gen_out}")
+                # query = (
+                #     gen_out.full_response.data.query
+                #     if gen_out.full_response and gen_out.full_response.data
+                #     else None
+                # )
+            retrieve_out = self.retrieve(input=gen_out)
+            print(f"retrieve_out: {retrieve_out}")
+            # passages = []
+            # if isinstance(retrieve_out, Parameter):
+            #     passages = retrieve_out.data[0].documents
+            # else:
+            #     passages = retrieve_out[0].documents
+
+            # print(f"passages: {passages}")
+
+            # context = deduplicate(context + passages)
+
+        # # for hop in range(self.max_hops):
+        # last_context_param = Parameter(
+        #     data=context,
+        #     name=f"query_context_{id}_{0}",
+        #     requires_opt=True,
+        # )
+        # query = self.query_generator(
+        #     prompt_kwargs={
+        #         "context": last_context_param,
+        #         "question": question,
+        #     },
+        #     id=id,
+        # )
+        # print(f"query: {query}")
+        # if isinstance(query, GeneratorOutput):
+        #     query = query.data
+        # output = self.retrieve(query)
+        # print(f"output: {output}")
+        # print(f"output call: {output}")
+        # return output[0].documents
+
+    # def forward(self, question: str, id: str = None) -> Parameter:
+    #     question_param = question
+    #     if not isinstance(question, Parameter):
+    #         question_param = Parameter(
+    #             data=question,
+    #             name="question",
+    #             role_desc="The question to be answered",
+    #             requires_opt=False,
+    #         )
+    #     context = []
+    #     self.max_hops = 1
+    #     # for hop in range(self.max_hops):
+    #     last_context_param = Parameter(
+    #         data=context,
+    #         name=f"query_context_{id}_{0}",
+    #         requires_opt=True,
+    #     )
+    #     query = self.query_generator(
+    #         prompt_kwargs={
+    #             "context": last_context_param,
+    #             "question": question_param,
+    #         },
+    #         id=id,
+    #     )
+    #     print(f"query: {query}")
+    #     if isinstance(query, GeneratorOutput):
+    #         query = query.data
+    #     output = self.retrieve(query)
+    #     print(f"output: {output}")
+    #     passages = []
+    #     if isinstance(output, Parameter):
+    #         passages = output.data[0].documents
+    #     else:
+    #         passages = output[0].documents
+    #     # context = deduplicate(context + passages) # all these needs to gradable
+    #     # output_param = Parameter(
+    #     #     data=passages,
+    #     #     alias=f"qa_context_{id}",
+    #     #     role_desc="The context to be used for answering the question",
+    #     #     requires_opt=True,
+    #     # )
+    #     output.data = passages  # reset the values to be used in the next
+    #     if not isinstance(output, Parameter):
+    #         raise ValueError(f"Output must be a Parameter, got {output}")
+    #     return output
+    #     # output_param.set_grad_fn(
+    #     #     BackwardContext(
+    #     #         backward_fn=self.backward,
+    #     #         response=output_param,
+    #     #         id=id,
+    #     #         prededecessors=prededecessors,
+    #     #     )
+    #     # )
+    #     # return output_param
+
+    def backward(self, response: Parameter, id: Optional[str] = None):
+        print(f"MultiHopRetriever backward: {response}")
+        children_params = response.predecessors
+        # backward score to the demo parameter
+        for pred in children_params:
+            if pred.requires_opt:
+                # pred._score = float(response._score)
+                pred.set_score(response._score)
+                print(
+                    f"backpropagate the score {response._score} to {pred.name}, is_teacher: {self.teacher_mode}"
+                )
+                if pred.param_type == ParameterType.DEMOS:
+                    # Accumulate the score to the demo
+                    pred.add_score_to_trace(
+                        trace_id=id, score=response._score, is_teacher=self.teacher_mode
+                    )
+                    print(f"Pred: {pred.name}, traces: {pred._traces}")
+
+
+class HotPotQARAG(
+    Component
+):  # use component as not creating a new ops, but assemble existing ops
+    r"""Same system prompt as text-grad paper, but with our one message prompt template, which has better starting performance"""
+
+    def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2):
+        super().__init__()
+
+        self.passages_per_hop = passages_per_hop
+        self.max_hops = max_hops
+
+        self.multi_hop_retriever = MultiHopRetriever(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            passages_per_hop=passages_per_hop,
+            max_hops=max_hops,
+        )
+        # TODO: sometimes the cache will collide, so we get different evaluation
+        self.llm_counter = Generator(
+            name="QuestionAnswering",
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            prompt_kwargs={
+                "few_shot_demos": Parameter(
+                    name="few_shot_demos",
+                    data=None,
+                    role_desc="To provide few shot demos to the language model",
+                    requires_opt=True,
+                    param_type=ParameterType.DEMOS,
+                )
+            },
+            template=answer_template,
+            output_processors=parse_string_answer,
+            use_cache=True,
+            demo_data_class=HotPotQADemoData,
+            demo_data_class_input_mapping={
+                "question": "question",
+                "context": "context",
+            },
+            demo_data_class_output_mapping={"answer": lambda x: x.raw_response},
+        )
+
+    # TODO: the error will be a context
+    # a component wont handle training, forward or backward, just passing everything through
+    def call(self, question: str, id: str = None) -> Union[Parameter, str]:
+
+        # normal component, will be called when in inference mode
+
+        question_param = Parameter(
+            data=question,
+            name="question",
+            role_desc="The question to be answered",
+            requires_opt=False,
+        )
+        context = []  # noqa: F841
+        output = None
+        retrieved_context = self.multi_hop_retriever(question_param, id=id)
+
+        # forming a backpropagation graph
+        # Make this step traceable too.
+        # for hop in range(self.max_hops):
+        #     # make context a parameter to be able to trace
+        #     query = self.query_generator(
+        #         prompt_kwargs={
+        #             "context": Parameter(
+        #                 data=context, alias=f"query_context_{id}", requires_opt=True
+        #             ),
+        #             "question": question_param,
+        #         },
+        #         id=id,
+        #     )
+        #     print(f"query: {query}")
+        #     if isinstance(query, GeneratorOutput):
+        #         query = query.data
+        #     output = self.retrieve(query)
+        #     print(f"output: {output}")
+        #     passages = []
+        #     if isinstance(output, Parameter):
+        #         passages = output.data[0].documents
+        #     else:
+        #         output[0].documents
+        #     context = deduplicate(context + passages)
+        # print(f"context: {context}")
+
+        output = self.llm_counter(
+            prompt_kwargs={
+                "context": retrieved_context,
+                "question": question_param,
+            },
+            id=id,
+        )  # already support both training (forward + call)
+
+        if (
+            not self.training
+        ):  # if users want to customize the output, ensure to use if not self.training
+
+            # convert the generator output to a normal data format
+            print(f"converting output: {output}")
+
+            if output.data is None:
+                error_msg = (
+                    f"Error in processing the question: {question}, output: {output}"
+                )
+                print(error_msg)
+                output = error_msg
+            else:
+                output = output.data
+        return output
+
+
+from adalflow.optim.trainer.adal import AdalComponent
+from adalflow.optim.trainer.trainer import Trainer
+from adalflow.optim.few_shot.bootstrap_optimizer import BootstrapFewShot
+from adalflow.eval.answer_match_acc import AnswerMatchAcc
+from adalflow.optim.text_grad.text_loss_with_eval_fn import EvalFnToTextLoss
+
+
+class HotPotQARAGAdal(AdalComponent):
+    # TODO: move teacher model or config in the base class so users dont feel customize too much
+    def __init__(self, task: Component, teacher_model_config: dict):
+        super().__init__()
+        self.task = task
+        self.teacher_model_config = teacher_model_config
+
+        self.evaluator = AnswerMatchAcc("fuzzy_match")
+        self.eval_fn = self.evaluator.compute_single_item
+        # self.eval_fn = eval_fn
+
+    def handle_one_task_sample(
+        self, sample: HotPotQAData
+    ) -> Any:  # TODO: auto id, with index in call train examples
+        return self.task, {"question": sample.question, "id": sample.id}
+
+    def handle_one_loss_sample(
+        self, sample: HotPotQAData, y_pred: Any
+    ) -> Tuple[Callable, Dict]:
+        return self.loss_fn.forward, {
+            "kwargs": {
+                "y": y_pred,
+                "y_gt": Parameter(
+                    data=sample.answer,
+                    role_desc="The ground truth(reference correct answer)",
+                    name="y_gt",
+                    requires_opt=False,
+                ),
+            }
+        }
+
+    def configure_optimizers(self, *args, **kwargs):
+
+        # TODO: simplify this, make it accept generator
+        parameters = []
+        for name, param in self.task.named_parameters():
+            param.name = name
+            parameters.append(param)
+        do = BootstrapFewShot(params=parameters)
+        return [do]
+
+    def evaluate_one_sample(
+        self, sample: Any, y_pred: Any, metadata: Dict[str, Any]
+    ) -> Any:
+
+        # we need "context" be passed as metadata
+        # print(f"sample: {sample}, y_pred: {y_pred}")
+        # convert pred to Dspy structure
+
+        # y_obj = convert_y_pred_to_dataclass(y_pred)
+        # print(f"y_obj: {y_obj}")
+        # raise ValueError("Stop here")
+        if metadata:
+            return self.eval_fn(sample, y_pred, metadata)
+        return self.eval_fn(sample, y_pred)
+
+    def configure_teacher_generator(self):
+        super().configure_teacher_generator(**self.teacher_model_config)
+
+    def configure_loss_fn(self):
+        self.loss_fn = EvalFnToTextLoss(
+            eval_fn=self.eval_fn,
+            eval_fn_desc="ObjectCountingEvalFn, Output accuracy score: 1 for correct, 0 for incorrect",
+            backward_engine=None,
+        )
+
+
+def validate_dspy_demos(
+    demos_file="benchmarks/BHH_object_count/models/dspy/hotpotqa.json",
+):
+    from adalflow.utils.file_io import load_json
+
+    demos_json = load_json(demos_file)
+
+    demos = demos_json["generate_answer"]["demos"]  # noqa: F841
+
+    # task = HotPotQARAG(  # noqa: F841
+    #     **gpt_3_model,
+    #     passages_per_hop=3,
+    #     max_hops=2,
+    # )
+    # task.llm_counter.p
+
+
+def test_multi_hop_retriever():
+
+    from use_cases.config import (
+        gpt_3_model,
+    )
+
+    multi_hop_retriever = MultiHopRetriever(
+        **gpt_3_model,
+        passages_per_hop=3,
+        max_hops=2,
+    )
+    # 1. use print
+    # print(multi_hop_retriever.query_generator)
+    # # 2. run one forward for query generator
+    question = "How many storeys are in the castle that David Gregory inherited?"
+    # context = []
+    # context_str = multi_hop_retriever.context_to_str(context)
+    # print(
+    #     multi_hop_retriever.query_generator(
+    #         prompt_kwargs={"question": question, "context": context_str}, id="1"
+    #     )
+    # )
+    # # verfify the prompt
+    # multi_hop_retriever.query_generator.print_prompt(
+    #     **{"question": question, "context": context_str}
+    # )
+
+    # training mode
+    multi_hop_retriever.train()
+
+    # 3. run one forward for retriever
+    print(multi_hop_retriever(question=question, id="1"))
+
+
+def train():
+    trainset, valset, testset = load_datasets()
+
+    from use_cases.config import (
+        gpt_3_model,
+        gpt_4o_model,
+    )
+
+    task = HotPotQARAG(
+        **gpt_3_model,
+        passages_per_hop=3,
+        max_hops=2,
+    )
+    print(task)
+    question = "How long is the highway Whitehorse/Cousins Airport was built to support as of 2012?"
+    print(task(question))
+
+    # for name, param in task.named_parameters():
+    #     print(f"name: {name}, param: {param}")
+
+    trainset, valset, testset = load_datasets()
+
+    trainer = Trainer(
+        adaltask=HotPotQARAGAdal(task=task, teacher_model_config=gpt_4o_model),
+        max_steps=10,
+        raw_shots=0,
+        bootstrap_shots=4,
+        train_batch_size=4,
+        ckpt_path="hotpot_qa_rag",
+        strategy="random",
+        save_traces=True,
+        debug=True,  # make it having debug mode
+        weighted_sampling=True,
+    )
+    # fit include max steps
+    trainer.fit(
+        train_dataset=trainset, val_dataset=valset, test_dataset=testset, debug=True
+    )
+
+
+if __name__ == "__main__":
+    ### Try the minimum effort to test on any task
+
+    # get_logger(level="DEBUG")
+    test_multi_hop_retriever()
+
+
+# TODO: i forgot that i need demo_data_class
+# TODO: i forgot that i need to set id
+# Failed to generate demos but no error messages
diff --git a/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py b/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py
new file mode 100644
index 00000000..a930ce63
--- /dev/null
+++ b/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py
@@ -0,0 +1,251 @@
+"""We will use dspy's retriever to keep that the same and only use our generator and optimizer"""
+
+from typing import List, Optional
+from dataclasses import dataclass, field
+import dspy
+
+import adalflow as adal
+
+from adalflow.datasets.hotpot_qa import HotPotQA
+
+from adalflow.core.retriever import Retriever
+from adalflow.core.types import RetrieverOutput
+from adalflow.core import Generator
+
+
+colbertv2_wiki17_abstracts = dspy.ColBERTv2(
+    url="http://20.102.90.50:2017/wiki17_abstracts"
+)
+
+dspy.settings.configure(rm=colbertv2_wiki17_abstracts)
+
+
+def load_datasets():
+
+    trainset = HotPotQA(split="train", size=20)
+    valset = HotPotQA(split="val", size=50)
+    testset = HotPotQA(split="test", size=50)
+    print(f"trainset, valset: {len(trainset)}, {len(valset)}, example: {trainset[0]}")
+    return trainset, valset, testset
+
+
+# task pipeline
+
+
+# dspy format
+# Follow the following format.
+# Context: may contain relevant facts
+# Question: ${question}
+# Reasoning: Let's think step by step in order to ${produce the query}. We ...
+# Query: ${query}
+@dataclass
+class QueryRewritterData(adal.DataClass):
+    reasoning: str = field(
+        metadata={"desc": "The reasoning to produce the query"},
+    )
+    query: str = field(
+        metadata={"desc": "The query you produced"},
+    )
+
+    __output_fields__ = ["reasoning", "query"]
+
+
+@dataclass
+class AnswerData(adal.DataClass):
+    reasoning: str = field(
+        metadata={"desc": "The reasoning to produce the answer"},
+    )
+    answer: str = field(
+        metadata={"desc": "The answer you produced"},
+    )
+
+    __output_fields__ = ["reasoning", "answer"]
+
+
+query_template = """<START_OF_SYSTEM_PROMPT>
+Write a simple search query that will help answer a complex question.
+
+You will receive a context(may contain relevant facts) and a question.
+Think step by step.
+
+{{output_format_str}}
+{# Few shot demos #}
+{% if few_shot_demos is not none %}
+Here are some examples:
+{{few_shot_demos}}
+{% endif %}
+<END_OF_SYSTEM_PROMPT>
+<START_OF_USER>
+Context: {{context}}
+Question: {{question}}
+<END_OF_USER>
+"""
+
+# Library gives a standard template for easy prompt
+answer_template = """<START_OF_SYSTEM_PROMPT>
+{{task_desc_str}}
+
+{{output_format_str}}
+{# Few shot demos #}
+{% if few_shot_demos is not none %}
+Here are some examples:
+{{few_shot_demos}}
+{% endif %}
+<END_OF_SYSTEM_PROMPT>
+<START_OF_USER>
+Context: {{context}}
+Question: {{question}}
+<END_OF_USER>
+"""
+
+
+# Demonstrating how to wrap other retriever to adalflow retriever and be applied in training pipeline
+# as a subclass of retriever which is a subclass of GradComponent, we dont need to do additional implementation
+# data processing has already done
+class DspyRetriever(Retriever):
+    def __init__(self, top_k: int = 3):
+        super().__init__()
+        self.top_k = top_k
+        self.dspy_retriever = dspy.Retrieve(k=top_k)
+
+    def call(self, input: str, top_k: Optional[int] = None) -> List[RetrieverOutput]:
+
+        k = top_k or self.top_k
+
+        output = self.dspy_retriever(query_or_queries=input, k=k)
+        # print(f"dsy_retriever output: {output}")
+        final_output: List[RetrieverOutput] = []
+        documents = output.passages
+
+        final_output.append(
+            RetrieverOutput(
+                query=input,
+                documents=documents,
+                doc_indices=[],
+            )
+        )
+        # print(f"final_output: {final_output}")
+        return final_output
+
+
+task_desc_str = r"""Answer questions with short factoid answers.
+
+You will receive context(may contain relevant facts) and a question.
+Think step by step."""
+
+
+class VanillaRAG(adal.GradComponent):
+    def __init__(self, passages_per_hop=3, model_client=None, model_kwargs=None):
+        super().__init__()
+
+        self.passages_per_hop = passages_per_hop
+
+        self.retriever = DspyRetriever(top_k=passages_per_hop)
+        self.llm_parser = adal.DataClassParser(
+            data_class=AnswerData, return_data_class=True, format_type="json"
+        )
+        self.llm = Generator(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            prompt_kwargs={
+                "task_desc_str": adal.Parameter(
+                    data=task_desc_str,
+                    role_desc="Task description for the language model",
+                    param_type=adal.ParameterType.PROMPT,
+                ),
+                "few_shot_demos": adal.Parameter(
+                    data=None,
+                    requires_opt=True,
+                    role_desc="To provide few shot demos to the language model",
+                    param_type=adal.ParameterType.DEMOS,
+                ),
+                "output_format_str": self.llm_parser.get_output_format_str(),
+            },
+            template=answer_template,
+            output_processors=self.llm_parser,
+            use_cache=True,
+        )
+
+    def call(self, question: str, id: str = None) -> adal.GeneratorOutput:
+        if self.training:
+            raise ValueError(
+                "This component is not supposed to be called in training mode"
+            )
+        # user should just treat it as a call function
+        # and we will handle the connection between the components
+        # they should directly pass the retriever_output along with
+        # each output's successor_map_fn.
+        # what if it is passed to two different componnents?
+        # we can create a copy
+        retriever_out = self.retriever.call(input=question)
+
+        successor_map_fn = lambda x: (  # noqa E731
+            "\n\n".join(x[0].documents) if x and x[0] and x[0].documents else ""
+        )
+        retrieved_context = successor_map_fn(retriever_out)
+
+        # print(f"retrieved_context: {retrieved_context}")
+        # print(f"retriever_out: {retriever_out}")
+        prompt_kwargs = {
+            "context": retrieved_context,
+            "question": question,
+        }
+
+        output = self.llm.call(
+            prompt_kwargs=prompt_kwargs,
+            id=id,
+        )
+        # self.llm.print_prompt(**prompt_kwargs)
+        return output
+
+    def forward(self, question: str, id: str = None) -> adal.Parameter:
+        if not self.training:
+            raise ValueError("This component is not supposed to be called in eval mode")
+        # TODO: add id in the retriever output
+        retriever_out = self.retriever.forward(input=question)
+        successor_map_fn = lambda x: (  # noqa E731
+            "\n\n".join(x.data[0].documents)
+            if x.data and x.data[0] and x.data[0].documents
+            else ""
+        )
+        retriever_out.add_successor_map_fn(successor=self.llm, map_fn=successor_map_fn)
+        generator_out = self.llm.forward(
+            prompt_kwargs={"question": question, "context": retriever_out}, id=id
+        )
+        return generator_out
+
+
+def test_vailla_rag():
+
+    from use_cases.config import (
+        gpt_3_model,
+    )
+
+    task = VanillaRAG(
+        **gpt_3_model,
+        passages_per_hop=3,
+    )
+
+    # test the retriever
+
+    question = "How many storeys are in the castle that David Gregory inherited?"
+
+    task.train()
+
+    retriever_out = task.retriever(input=question)
+
+    print(f"retriever_out: {retriever_out}")
+
+    # test the forward function
+    generator_out = task.forward(question=question, id="1")
+    print(f"generator_out: {generator_out}")
+
+    generator_out.draw_graph()
+
+    task.eval()
+    generator_out = task.call(question=question, id="1")
+    print(f"generator_out: {generator_out}")
+
+
+if __name__ == "__main__":
+    test_vailla_rag()
diff --git a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
new file mode 100644
index 00000000..0468a113
--- /dev/null
+++ b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py
@@ -0,0 +1,167 @@
+from typing import Any, Callable, Dict, Tuple
+
+import adalflow as adal
+from adalflow.eval.answer_match_acc import AnswerMatchAcc
+from adalflow.datasets.types import HotPotQAData
+
+from benchmarks.hotpot_qa.adal_train import load_datasets
+from benchmarks.hotpot_qa.adal_exp.build_vanilla_rag import VanillaRAG
+from use_cases.config import gpt_3_model, gpt_4o_model
+
+
+# TODO: look more into the loss function
+# TODO: test LLM judge too.
+class VallinaRAGAdal(adal.AdalComponent):
+    def __init__(
+        self,
+        model_client: adal.ModelClient,
+        model_kwargs: Dict,
+        backward_engine_model_config: Dict | None = None,
+        teacher_model_config: Dict | None = None,
+        text_optimizer_model_config: Dict | None = None,
+    ):
+        task = VanillaRAG(
+            model_client=model_client,
+            model_kwargs=model_kwargs,
+            passages_per_hop=3,
+        )
+        eval_fn = AnswerMatchAcc(type="fuzzy_match").compute_single_item
+        loss_fn = adal.EvalFnToTextLoss(
+            eval_fn=eval_fn, eval_fn_desc="fuzzy_match: 1 if str(y) in str(y_gt) else 0"
+        )
+        super().__init__(
+            task=task,
+            eval_fn=eval_fn,
+            loss_fn=loss_fn,
+            backward_engine_model_config=backward_engine_model_config,
+            teacher_model_config=teacher_model_config,
+            text_optimizer_model_config=text_optimizer_model_config,
+        )
+
+    # tell the trainer how to call the task
+    def handle_one_task_sample(
+        self, sample: HotPotQAData
+    ) -> Tuple[Callable[..., Any], Dict]:
+        if self.task.training:  # TODO: make the components more clear
+            return self.task.forward, {"question": sample.question, "id": sample.id}
+        else:
+            return self.task.call, {"question": sample.question, "id": sample.id}
+
+    # TODO: use two map fn to make the cde even simpler
+
+    # eval mode: get the generator output, directly engage with the eval_fn
+    def evaluate_one_sample(
+        self, sample: HotPotQAData, y_pred: adal.GeneratorOutput
+    ) -> float:
+        y_label = ""
+        if y_pred and y_pred.data and y_pred.data.answer:
+            y_label = y_pred.data.answer
+        return self.eval_fn(y=y_label, y_gt=sample.answer)
+
+    # train mode: get the loss and get the data from the full_response
+    def handle_one_loss_sample(self, sample: HotPotQAData, pred: adal.Parameter):
+        # prepare gt parameter
+        y_gt = adal.Parameter(
+            name="y_gt",
+            data=sample.answer,
+            eval_input=sample.answer,
+            requires_opt=False,
+        )
+
+        # pred's full_response is the output of the task pipeline which is GeneratorOutput
+        pred.eval_input = (
+            pred.full_response.data.answer
+            if pred.full_response
+            and pred.full_response.data
+            and pred.full_response.data.answer
+            else ""
+        )
+        return self.loss_fn, {"kwargs": {"y": pred, "y_gt": y_gt}}
+
+
+# Note: diagnose is quite helpful, it helps you to quickly check if the evalfunction is the right metrics
+# i checked the eval which does fuzzy match, and found some yes and Yes are not matched, then converted both strings to lower and
+# the performances have gone up from 0.15 to 0.4
+def train_diagnose(
+    model_client: adal.ModelClient,
+    model_kwargs: Dict,
+) -> Dict:
+
+    trainset, valset, testset = load_datasets()
+
+    adal_component = VallinaRAGAdal(
+        model_client,
+        model_kwargs,
+        backward_engine_model_config=gpt_4o_model,
+        teacher_model_config=gpt_3_model,
+        text_optimizer_model_config=gpt_3_model,
+    )
+    trainer = adal.Trainer(adaltask=adal_component)
+    trainer.diagnose(dataset=trainset, split="train")
+    # trainer.diagnose(dataset=valset, split="val")
+    # trainer.diagnose(dataset=testset, split="test")
+
+
+def train(
+    train_batch_size=4,  # larger batch size is not that effective, probably because of llm's lost in the middle
+    raw_shots: int = 0,
+    bootstrap_shots: int = 1,
+    max_steps=1,
+    num_workers=4,
+    strategy="constrained",
+    optimization_order="sequential",
+    debug=False,
+    resume_from_ckpt=None,
+    exclude_input_fields_from_bootstrap_demos=False,
+):
+    adal_component = VallinaRAGAdal(
+        **gpt_3_model,
+        teacher_model_config=gpt_4o_model,
+        text_optimizer_model_config=gpt_4o_model,
+        backward_engine_model_config=gpt_4o_model
+    )
+    print(adal_component)
+    trainer = adal.Trainer(
+        train_batch_size=train_batch_size,
+        adaltask=adal_component,
+        strategy=strategy,
+        max_steps=max_steps,
+        num_workers=num_workers,
+        raw_shots=raw_shots,
+        bootstrap_shots=bootstrap_shots,
+        debug=debug,
+        weighted_sampling=True,
+        optimization_order=optimization_order,
+        exclude_input_fields_from_bootstrap_demos=exclude_input_fields_from_bootstrap_demos,
+    )
+    print(trainer)
+
+    train_dataset, val_dataset, test_dataset = load_datasets()
+    trainer.fit(
+        train_dataset=train_dataset,
+        val_dataset=val_dataset,
+        test_dataset=test_dataset,
+        resume_from_ckpt=resume_from_ckpt,
+    )
+
+
+if __name__ == "__main__":
+    from use_cases.config import gpt_3_model
+
+    adal.setup_env()
+
+    # task = VallinaRAGAdal(**gpt_3_model)
+    # print(task)
+
+    # train_diagnose(**gpt_3_model)
+
+    # train: 0.15 before the evaluator converted to lower and 0.4 after the conversion
+    # TODO: test debug mode
+    train(
+        debug=False,
+        max_steps=12,
+        resume_from_ckpt="/Users/liyin/.adalflow/ckpt/ValinaRAGAdal/random_max_steps_12_7c091_run_1.json",
+    )
+    # random_max_steps_12_ecf16_run_9.json, demo only, val 0.6 to 0.68,  test: 0.58-0.61
+    # random_max_steps_12_7c091_run_1.json,  prompt + demo, 0.58 -0.62, test: 0.55 - 0.58
+    # resume from random_max_steps_12_7c091_run_1.json
diff --git a/benchmarks/hotpot_qa/adal_train.py b/benchmarks/hotpot_qa/adal_train.py
index 5d4a54ec..bcf305fd 100644
--- a/benchmarks/hotpot_qa/adal_train.py
+++ b/benchmarks/hotpot_qa/adal_train.py
@@ -2,6 +2,9 @@
 
 import dspy
 from typing import List, Union, Optional, Dict, Callable
+from dataclasses import dataclass, field
+
+import adalflow as adal
 from adalflow.optim.parameter import Parameter, ParameterType
 
 from adalflow.datasets.hotpot_qa import HotPotQA, HotPotQAData
@@ -18,12 +21,10 @@
 
 
 def load_datasets():
-    # trainset = HotPotQA(split="train", size=2)
-    # valset = HotPotQA(split="val", size=5)
-    # testset = HotPotQA(split="test", size=5)
-    trainset = HotPotQA(split="train", size=20)
+
+    trainset = HotPotQA(split="train", size=50)
     valset = HotPotQA(split="val", size=50)
-    testset = HotPotQA(split="test", size=50)
+    testset = HotPotQA(split="test", size=100)
     print(f"trainset, valset: {len(trainset)}, {len(valset)}, example: {trainset[0]}")
     return trainset, valset, testset
 
@@ -34,12 +35,43 @@ def load_datasets():
 from adalflow.core import Component, Generator
 
 
+# dspy format
+# Follow the following format.
+# Context: may contain relevant facts
+# Question: ${question}
+# Reasoning: Let's think step by step in order to ${produce the query}. We ...
+# Query: ${query}
+@dataclass
+class QueryRewritterData(adal.DataClass):
+    reasoning: str = field(
+        metadata={"desc": "The reasoning to produce the query"},
+    )
+    query: str = field(
+        metadata={"desc": "The query you produced"},
+    )
+
+    __output_fields__ = ["reasoning", "query"]
+
+
+@dataclass
+class AnswerData(adal.DataClass):
+    reasoning: str = field(
+        metadata={"desc": "The reasoning to produce the answer"},
+    )
+    answer: str = field(
+        metadata={"desc": "The answer you produced"},
+    )
+
+    __output_fields__ = ["reasoning", "answer"]
+
+
 query_template = """<START_OF_SYSTEM_PROMPT>
 Write a simple search query that will help answer a complex question.
 
-You will receive a context and a question. Think step by step.
-The last line of your response should be of the following format: 'Query: $VALUE' where VALUE is a search query.
+You will receive a context(may contain relevant facts) and a question.
+Think step by step.
 
+{{output_format_str}}
 {# Few shot demos #}
 {% if few_shot_demos is not none %}
 Here are some examples:
@@ -56,9 +88,9 @@ def load_datasets():
 answer_template = """<START_OF_SYSTEM_PROMPT>
 Answer questions with short factoid answers.
 
-You will receive context and a question. Think step by step.
-The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a short factoid answer.
-
+You will receive context(may contain relevabt facts) and a question.
+Think step by step.
+{{output_format_str}}
 {# Few shot demos #}
 {% if few_shot_demos is not none %}
 Here are some examples:
@@ -137,7 +169,7 @@ def __init__(self, k=3):
         self.k = k
         self.dspy_retriever = dspy.Retrieve(k=k)
 
-    def call(self, input, top_k=None, id=None):
+    def call(self, input: str) -> List[RetrieverOutput]:
         output = self.dspy_retriever(query_or_queries=input, k=self.k)
         print(f"dsy_retriever output: {output}")
         final_output: List[RetrieverOutput] = []
@@ -161,13 +193,18 @@ def call(self, input, top_k=None, id=None):
 
 
 # User customize an auto-grad operator
-class MultiHopRetriever(adal.GradComponent):
+class MultiHopRetriever(adal.Retriever):
     def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2):
         super().__init__()
 
         self.passages_per_hop = passages_per_hop
         self.max_hops = max_hops
 
+        self.data_parser = adal.DataClassParser(
+            data_class=QueryRewritterData, return_data_class=True, format_type="yaml"
+        )
+
+        # Grad Component
         self.query_generator = Generator(
             name="query_generator",
             model_client=model_client,
@@ -179,100 +216,145 @@ def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2):
                     role_desc="To provide few shot demos to the language model",
                     requires_opt=True,
                     param_type=ParameterType.DEMOS,
-                )
+                ),
+                "output_format_str": self.data_parser.get_output_format_str(),
             },
             template=query_template,
-            output_processors=parse_string_query,
+            # output_processors=parse_string_query,
+            output_processors=self.data_parser,
             use_cache=True,
-            demo_data_class=HotPotQADemoData,
-            demo_data_class_input_mapping={
-                "question": "question",
-                # "context": "context",
-            },
-            demo_data_class_output_mapping={"answer": lambda x: x.raw_response},
+            # demo_data_class=HotPotQADemoData,
+            # demo_data_class_input_mapping={
+            #     "question": "question",
+            #     # "context": "context",
+            # },
+            # demo_data_class_output_mapping={"answer": lambda x: x.raw_response},
         )
         self.retrieve = DspyRetriever(k=passages_per_hop)
 
-    def call(self, question: str, id: str = None) -> Any:  # Add id for tracing
-        # inferenc mode
+    @staticmethod
+    def context_to_str(context: List[str]) -> str:
+        return "\n".join(context)
+
+    def call(self, *, question: str, id: str = None) -> Any:  # Add id for tracing
+        # inference mode!!!
         # output = self.forward(question, id=id)
+
         context = []
         self.max_hops = 1
-        # for hop in range(self.max_hops):
-        last_context_param = Parameter(
-            data=context,
-            name=f"query_context_{id}_{0}",
-            requires_opt=True,
-        )
-        query = self.query_generator(
-            prompt_kwargs={
-                "context": last_context_param,
-                "question": question,
-            },
-            id=id,
-        )
-        print(f"query: {query}")
-        if isinstance(query, GeneratorOutput):
-            query = query.data
-        output = self.retrieve(query)
-        print(f"output: {output}")
-        print(f"output call: {output}")
-        return output[0].documents
-
-    def forward(self, question: str, id: str = None) -> Parameter:
-        question_param = question
-        if not isinstance(question, Parameter):
-            question_param = Parameter(
-                data=question,
-                name="question",
-                role_desc="The question to be answered",
-                requires_opt=False,
+        for hop in range(self.max_hops):
+            gen_out = self.query_generator(
+                prompt_kwargs={
+                    "context": self.context_to_str(context),
+                    "question": question,
+                },
+                id=id,
             )
-        context = []
-        self.max_hops = 1
-        # for hop in range(self.max_hops):
-        last_context_param = Parameter(
-            data=context,
-            name=f"query_context_{id}_{0}",
-            requires_opt=True,
-        )
-        query = self.query_generator(
-            prompt_kwargs={
-                "context": last_context_param,
-                "question": question_param,
-            },
-            id=id,
-        )
-        print(f"query: {query}")
-        if isinstance(query, GeneratorOutput):
-            query = query.data
-        output = self.retrieve(query)
-        print(f"output: {output}")
-        passages = []
-        if isinstance(output, Parameter):
-            passages = output.data[0].documents
-        else:
-            passages = output[0].documents
-        # context = deduplicate(context + passages) # all these needs to gradable
-        # output_param = Parameter(
-        #     data=passages,
-        #     alias=f"qa_context_{id}",
-        #     role_desc="The context to be used for answering the question",
+            query = None
+            # TODO: the bridge between the retriever to the generator and generator to the retriever needs to be more smooth
+            if isinstance(gen_out, GeneratorOutput):
+                query = (  # noqa: F841
+                    gen_out.data.query if gen_out.data and gen_out.data.query else None
+                )
+            elif isinstance(gen_out, adal.Parameter):
+                gen_out.successor_map_fn = lambda x: (
+                    x.full_response.data.query
+                    if x.full_response and x.full_response.data
+                    else None
+                )
+                print(f"gen_out: {gen_out}")
+                # query = (
+                #     gen_out.full_response.data.query
+                #     if gen_out.full_response and gen_out.full_response.data
+                #     else None
+                # )
+            retrieve_out = self.retrieve(input=gen_out)
+            print(f"retrieve_out: {retrieve_out}")
+            # passages = []
+            # if isinstance(retrieve_out, Parameter):
+            #     passages = retrieve_out.data[0].documents
+            # else:
+            #     passages = retrieve_out[0].documents
+
+            # print(f"passages: {passages}")
+
+            # context = deduplicate(context + passages)
+
+        # # for hop in range(self.max_hops):
+        # last_context_param = Parameter(
+        #     data=context,
+        #     name=f"query_context_{id}_{0}",
         #     requires_opt=True,
         # )
-        output.data = passages  # reset the values to be used in the next
-        if not isinstance(output, Parameter):
-            raise ValueError(f"Output must be a Parameter, got {output}")
-        return output
-        # output_param.set_grad_fn(
-        #     BackwardContext(
-        #         backward_fn=self.backward,
-        #         response=output_param,
-        #         id=id,
-        #         prededecessors=prededecessors,
-        #     )
+        # query = self.query_generator(
+        #     prompt_kwargs={
+        #         "context": last_context_param,
+        #         "question": question,
+        #     },
+        #     id=id,
         # )
-        # return output_param
+        # print(f"query: {query}")
+        # if isinstance(query, GeneratorOutput):
+        #     query = query.data
+        # output = self.retrieve(query)
+        # print(f"output: {output}")
+        # print(f"output call: {output}")
+        # return output[0].documents
+
+    # def forward(self, question: str, id: str = None) -> Parameter:
+    #     question_param = question
+    #     if not isinstance(question, Parameter):
+    #         question_param = Parameter(
+    #             data=question,
+    #             name="question",
+    #             role_desc="The question to be answered",
+    #             requires_opt=False,
+    #         )
+    #     context = []
+    #     self.max_hops = 1
+    #     # for hop in range(self.max_hops):
+    #     last_context_param = Parameter(
+    #         data=context,
+    #         name=f"query_context_{id}_{0}",
+    #         requires_opt=True,
+    #     )
+    #     query = self.query_generator(
+    #         prompt_kwargs={
+    #             "context": last_context_param,
+    #             "question": question_param,
+    #         },
+    #         id=id,
+    #     )
+    #     print(f"query: {query}")
+    #     if isinstance(query, GeneratorOutput):
+    #         query = query.data
+    #     output = self.retrieve(query)
+    #     print(f"output: {output}")
+    #     passages = []
+    #     if isinstance(output, Parameter):
+    #         passages = output.data[0].documents
+    #     else:
+    #         passages = output[0].documents
+    #     # context = deduplicate(context + passages) # all these needs to gradable
+    #     # output_param = Parameter(
+    #     #     data=passages,
+    #     #     alias=f"qa_context_{id}",
+    #     #     role_desc="The context to be used for answering the question",
+    #     #     requires_opt=True,
+    #     # )
+    #     output.data = passages  # reset the values to be used in the next
+    #     if not isinstance(output, Parameter):
+    #         raise ValueError(f"Output must be a Parameter, got {output}")
+    #     return output
+    #     # output_param.set_grad_fn(
+    #     #     BackwardContext(
+    #     #         backward_fn=self.backward,
+    #     #         response=output_param,
+    #     #         id=id,
+    #     #         prededecessors=prededecessors,
+    #     #     )
+    #     # )
+    #     # return output_param
 
     def backward(self, response: Parameter, id: Optional[str] = None):
         print(f"MultiHopRetriever backward: {response}")
@@ -487,25 +569,55 @@ def validate_dspy_demos(
 
     demos = demos_json["generate_answer"]["demos"]  # noqa: F841
 
-    task = HotPotQARAG(  # noqa: F841
-        **gpt_3_model,
-        passages_per_hop=3,
-        max_hops=2,
-    )
+    # task = HotPotQARAG(  # noqa: F841
+    #     **gpt_3_model,
+    #     passages_per_hop=3,
+    #     max_hops=2,
+    # )
     # task.llm_counter.p
 
 
-if __name__ == "__main__":
-    ### Try the minimum effort to test on any task
+def test_multi_hop_retriever():
 
-    # get_logger(level="DEBUG")
+    from use_cases.config import (
+        gpt_3_model,
+    )
+
+    multi_hop_retriever = MultiHopRetriever(
+        **gpt_3_model,
+        passages_per_hop=3,
+        max_hops=2,
+    )
+    # 1. use print
+    # print(multi_hop_retriever.query_generator)
+    # # 2. run one forward for query generator
+    question = "How many storeys are in the castle that David Gregory inherited?"
+    # context = []
+    # context_str = multi_hop_retriever.context_to_str(context)
+    # print(
+    #     multi_hop_retriever.query_generator(
+    #         prompt_kwargs={"question": question, "context": context_str}, id="1"
+    #     )
+    # )
+    # # verfify the prompt
+    # multi_hop_retriever.query_generator.print_prompt(
+    #     **{"question": question, "context": context_str}
+    # )
+
+    # training mode
+    multi_hop_retriever.train()
+
+    # 3. run one forward for retriever
+    print(multi_hop_retriever(question=question, id="1"))
+
+
+def train():
     trainset, valset, testset = load_datasets()
 
-    from LightRAG.use_cases.config import (
+    from use_cases.config import (
         gpt_3_model,
         gpt_4o_model,
     )
-    import dspy
 
     task = HotPotQARAG(
         **gpt_3_model,
@@ -539,6 +651,13 @@ def validate_dspy_demos(
     )
 
 
+if __name__ == "__main__":
+    ### Try the minimum effort to test on any task
+
+    # get_logger(level="DEBUG")
+    test_multi_hop_retriever()
+
+
 # TODO: i forgot that i need demo_data_class
 # TODO: i forgot that i need to set id
 # Failed to generate demos but no error messages
diff --git a/benchmarks/hotpot_qa/dspy_train.py b/benchmarks/hotpot_qa/dspy_train.py
index a3008df4..34ad3b23 100644
--- a/benchmarks/hotpot_qa/dspy_train.py
+++ b/benchmarks/hotpot_qa/dspy_train.py
@@ -147,7 +147,7 @@ def gold_passages_retrieved(example, pred, trace=None):
 
 
 if __name__ == "__main__":
-    from lightrag.utils import setup_env
+    from adalflow.utils import setup_env
 
     setup_env()
     # Ask any question you like to this simple RAG program.
diff --git a/docs/source/_static/images/RAG_Enhancements.png b/docs/source/_static/images/RAG_Enhancements.png
new file mode 100644
index 00000000..946b12a7
Binary files /dev/null and b/docs/source/_static/images/RAG_Enhancements.png differ
diff --git a/docs/source/_static/images/RAG_architecture.png b/docs/source/_static/images/RAG_architecture.png
new file mode 100644
index 00000000..5bace2b6
Binary files /dev/null and b/docs/source/_static/images/RAG_architecture.png differ
diff --git a/docs/source/_static/images/RAG_workflow.png b/docs/source/_static/images/RAG_workflow.png
new file mode 100644
index 00000000..05cafdc3
Binary files /dev/null and b/docs/source/_static/images/RAG_workflow.png differ
diff --git a/docs/source/_static/images/REALM_train_architecture.png b/docs/source/_static/images/REALM_train_architecture.png
new file mode 100644
index 00000000..48e36b66
Binary files /dev/null and b/docs/source/_static/images/REALM_train_architecture.png differ
diff --git a/docs/source/_static/images/replug.png b/docs/source/_static/images/replug.png
new file mode 100644
index 00000000..f13c75b6
Binary files /dev/null and b/docs/source/_static/images/replug.png differ
diff --git a/docs/source/_static/images/self_rag.png b/docs/source/_static/images/self_rag.png
new file mode 100644
index 00000000..5f0bb277
Binary files /dev/null and b/docs/source/_static/images/self_rag.png differ
diff --git a/docs/source/_static/images/tensor_parameter.png b/docs/source/_static/images/tensor_parameter.png
new file mode 100644
index 00000000..c87f418e
Binary files /dev/null and b/docs/source/_static/images/tensor_parameter.png differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
index a0e60d68..d0b9b09f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -65,7 +65,7 @@
     #             banner.className = 'announcement-banner';
     #             // Create the content for the announcement
     #             banner.innerHTML = `
-    #                 <p>⭐️ If you find LightRAG helpful, give it a star on <a href='https://github.com/SylphAI-Inc/LightRAG'>GitHub!</a> ⭐️</p>
+    #                 <p>⭐️ If you find AdalFlow helpful, please star us on <a href='https://github.com/SylphAI-Inc/LightRAG'>GitHub!</a> ⭐️</p>
     #                 <button onclick="closeBanner()">×</button>
     #             `;
     #             // Append the banner to the banner header
diff --git a/docs/source/tutorials/auto_text_grad.rst b/docs/source/tutorials/auto_text_grad.rst
index cc1360da..ea4294f7 100644
--- a/docs/source/tutorials/auto_text_grad.rst
+++ b/docs/source/tutorials/auto_text_grad.rst
@@ -1,27 +1,149 @@
-Auto Text-Diff
+Auto Text-Grad
 ===============================================
 Show a DAG with parameter nodes and edges.
 
+To make a task pipeline trainable.
+
+Auto text grad system is similr to pytorch autograd. Here are how we also differs:
+
+1. Torch.Tensor & Torch.Parameter vs AdalFlow.Parameter : AdalFlow.Tensor can save any type of data and Tensor is mainly numerical array and matrices.
+This means that the backward is not the gradient function from the math operations applied on the tensor, but customized towards the operators.
+The operators here are components like Generator, Retriever, Loss function, etc.
+We have defined the backward function for the generator which genreates the textual feedback for Parameter of prompt type.
+For Retriever, right now, it does not have its parameter types that we optimize but it can very much change and be improved in the future.
+
+In adalflow, we use the parameter types to differentiate instead of separately create a Tensor and its subclass Parameter.
+We have the follow parameter type:
+
+- trainable parameters to generator
+   prompt
+   demos
+
+- intermediate parameters
+  - input to the component
+  - output from the component
+
+- gradient
+
+To be able to pass parameters around to the whole pipeline.
+
+
+
+Torch.no_grad() vs AdalFlow.GradComponent.
+
+Torch.no_grad() is a context manager that disables gradient calculation.
+(1) It stops tracking the operations that are performed to build the computation-graph. In AdalFlow, we use Adal.Component call or the subclass adal.GradComponent
+(2) Save and handles intermediate values(eg. activations, inputs) needed for the backward pass.
+(3) Stores the computation graph for later backpropagation.
+
+In pytorch you do this for inference:
+
+.. code-block:: python
+
+    import torch
+
+    model = MyModel()
+    model.eval()  # Set model to evaluation mode
+
+    with torch.no_grad():  # Disable gradient tracking
+        output = model(input_data)  # Forward pass only
+
+In AdalFlow, you do this for inference:
+
+.. code-block:: python
+
+    import adalflow as adal
+
+    task_pipeline = MyTaskPipeline()
+    task_pipeline.eval()  # Set model to evaluation mode
+    task_pipeline(input_data)  # similar to torch.no_grad() or
+    # task_pipeline.call(input_data)  # Forward pass only
+    # task_pipeline.acall(input_data)  # Forward pass only
+
+Just like pytorch has tensor and parameter, which are a special type of tensor, the gradcomponent is a special type of component capable of auto-text-grad.
+
+**How to connect the output-input between components?**
+
+In pytorch, this is a earsier problem as they are all matrices within tensors.
+But in LLM applications, (1) each component's output can be very different in terms of form.
+For generator we have ``GeneratorOutput`` and for retriever, we have ``List[RetrieverOutput]``
+. To connect retriever output to generator output, we need special handling of the `("\n\n".join(ro.[0].documents)` .
+For langgraph, this is done inside of each manually defined node. And the whole pipeline uses GraphState (global accessible) to the whole graph to access and to store the data.
+(2) we need robust error handling in our output structure too.
+
+
+.. code-block:: python
+
+    class GraphState(BaseModel):
+
+        question: Optional[str] = None
+        generation: Optional[str] = None
+        documents: List[str] = []
+
+    def retriever_node(state: GraphState):
+        new_documents = retriever.invoke(state.question)
+        new_documents = [d.page_content for d in new_documents]
+        state.documents.extend(new_documents)
+        return {"documents": state.documents}
+
+    def generation_node(state: GraphState):
+        generation = rag_chain.invoke({
+            "context": "\n\n".join(state.documents),
+            "question": state.question,
+        })
+        return {"generation": generation}
+
+When we are doing training, both outputs are parameters, but the way to connect data is the same.
+We use a successor_map_fn of type `Dict[str, Callable]` to connect the output of one component to the input of another component.
+str will be `id(successor)`. This is only needed in the forward function of any Component or GradComponent.
+
+Here is our example:
+
+.. code-block:: python
+
+    def foward(self, question: str, id: str = None) -> adal.Parameter:
+        retriever_out = self.retriever.forward(input=question)
+        successor_map_fn = lambda x: (
+            "\n\n".join(x.data[0].documents)
+            if x.data and x.data[0] and x.data[0].documents
+            else ""
+        )
+        retriever_out.add_successor_map_fn(successor=self.llm, map_fn=successor_map_fn)
+        generator_out = self.llm.forward(
+            prompt_kwargs={"question": question, "context": retriever_out}, id=id
+        )
+        return generator_out
+
+#TODO: save the trace_graph
+And here is our trace_graph:
+
 Textual Gradient Operators
 --------------------------
 "Textual gradient Operators" are the operators that are capable of backpropagation, this including operator for LLM calls, for evaluate function, and for llm as a judge function.
 Think of the LLM calls as model layer in pytorch, such as nn.Linear, nn.Conv2d, or transformer layers.
 Think of the evaluation function (normally you have gt) and LLM as judge (normall you have no gt reference but you rely on llm to give an evaluation score) as
 a loss function in pytorch, such as nn.CrossEntropyLoss, nn.MSELoss, or nn.BCELoss.
+
+
 These operators need to be capable of backpropagation to get "feedback"/"gradients" for the auto-diff optimizer.
-We introduce ``GradFunction`` class which consists of two must-have abstract methods: ``forward`` and ``backward``.
+We introduce ``GradComponent`` class which consists of two must-have abstract methods: ``forward`` and ``backward``.
+``GradComponent`` has default ``forward`` that wraps a normal function call inside of the ``forward`` method to return a Parameter and builds the computation graph.
 
 - ``forward``: The forward pass of the operator. It will return a `Prameter` with the backward function set to the backward function of the operator.
 - ``backward``: The backward pass of the operator. It will compute the response's predecessor's gradient with regard to the response. (The ``Parameter`` object returned by the ``forward`` method)
 
 We currently have the following operators:
-- ``Generator`` is adapted as a ``GradFunction``.
+- ``Generator`` is adapted as a ``GradComponent``.
+.. TODO:
+  - remove the __call__ and call method, use only forward and backward to simplify the understanding
+  - forward will be able to track the predecessors to form a DAG of parameters, this will always be helpful.
+  - # a forward will
 
 Generator Adaptation
 ~~~~~~~~~~~~~~~~~~~~~~
 
 In auto-text grad, generator needs to be adapted as an operator that supports backpropagation to get "feedback"/"gradients" for the auto-diff optimizer.
-So, it inherits from ``GradFunction`` class, adding ``forward``, ``backward`` and ``set_backward_engine`` methods.
+So, it inherits from ``GradComponent`` class, adding ``forward``, ``backward`` and ``set_backward_engine`` methods.
 
 Note:
 
@@ -32,6 +154,36 @@ Note:
  Here is one failure example: `data=Error: None, Raw response: Sure, I'm ready to help. What's the reasoning question?`.
 
 
+Retriever Adaptation
+~~~~~~~~~~~~~~~~~~~~~~
+For now, we dont set up persistent parameters for retriever, the role of the retriever is to relay  any intermediate parameters back to its predecessors if they happen to be a generator.
+The backward function for now has no effect, but it is a placeholder for future implementation.
+
+For demo optimizer, it does not need the whole pipeline to be propogatable, which means it can be a
+DAG of parameters. And the later is the condition to do text-grad for any generator in a task pipeline.
+..
+    TODO: if we set the top_k as a parameter (hyperparameter along with the data type int)
+    text_grad can be used to optimize the hyperparametr to replace the human intelligence.
+    will it work better than hyperparameter sweep? This is a future research project.
+
+To optimize any task pipeline
+------------------------------
+
+For generators: prompt_kwargs are the leaf nodes to optimize.
+It takes [str, Parameter] as value.
+
+GradComponent handles the predecessors which form a DAG of parameters.
+So all arguments in the input_args if they are of type parameters, they are all predecessors.
+
+A user subclass GradComponent will automatically make the component trainable (at least for the default behaviors).
+Just like in pytorch, if you subclass nn.Module, you can use the model to train.
+
+
+
+
+Question: there might no need to have the concept of Component, so we have simplier library apis and one less abstract layer.
+
+
 EvalFunction As Loss
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -57,3 +209,7 @@ AdalComponent to organize code
 
 Trainer to put all together
 ----------------------------
+
+..
+    TODO:
+    1. clearity on self.tracing
diff --git a/docs/source/tutorials/evaluation.rst b/docs/source/tutorials/evaluation.rst
index 1f14bf84..8a7318ad 100644
--- a/docs/source/tutorials/evaluation.rst
+++ b/docs/source/tutorials/evaluation.rst
@@ -197,6 +197,15 @@ Labeling, such as creating a reference text, can be quite challenging in many NL
 LLM as Judge
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
+    Just as how LLM has made the AI tasks easier, it has made the evaluation of AI tasks easier too.
+
+The real power of using LLM as judge is:
+
+1. its **adaptatibility** compared with all the above metrics, it can be adapted to any task out of the box.
+2. its **flexibility** and **robustness** at measuring. For many NLG tasks, there can only have multiple references or even countless correct reponses. Using traditional metrics can be very limiting.
+
+3. **Less training data**. Align an LLM judge to your task using (question, ground truth, generated text, gt_score) tuples takes less data than finetune a model like BERTScore.
+
 Evaluating an LLM application using an LLM as a judge is similar to building an LLM task pipeline.
 Developers need to understand the underlying prompt used by the LLM judge to determine whether the default judge is sufficient or if customization is required.
 
@@ -544,6 +553,7 @@ There is one new way is to indirectly use the ground truth answers from the gene
 
     Recall = [GT statements that can be attributed to the retrieved context] / [GT statements]
 
+There is also **Context Relevance** and **Context Precision** metrics in RAGAS.
 
 LLM or model based judge for Retriever Recall
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -577,6 +587,10 @@ See the evaluation on datasets at :doc:`Evaluating a RAG Pipeline <../tutorials/
 
 Additionally, there are more research for RAG evaluation, such as SemScore [13]_, ARES [14]_, RGB [15]_, etc.
 
+.. note::
+
+    GovTech Singapore provides a well-explained evaluation guideline [22]_ that aligns with our guideline but with more thereotical explanation on some metrics.
+
 
 For Contributors
 ------------------------------------------
@@ -614,6 +628,7 @@ References
 .. [19] Liu, Yang, et al. "Datasets for large language models: A comprehensive survey." arXiv preprint arXiv:2402.18041 (2024).
 .. [20] ROUGE Deep dive: https://medium.com/nlplanet/two-minutes-nlp-learn-the-rouge-metric-by-examples-f179cc285499
 .. [21] Zhu, Kunlun, et al. "RAGEval: Scenario Specific RAG Evaluation Dataset Generation Framework." arXiv preprint arXiv:2408.01262 (2024).
+.. [22] https://playbooks.capdev.govtext.gov.sg/evaluation/
 
 .. admonition:: AdalFlow Eval API Reference
    :class: highlight
@@ -632,3 +647,4 @@ References
    - `Hugging Face Metrics <https://huggingface.co/metrics>`_
    - `RAGAS <https://docs.ragas.io/en/stable/getstarted/index.html>`_
    - `G-eval <https://arxiv.org/abs/2303.08774>`_
+   - `Sklearn <https://scikit-learn.org/stable/modules/model_evaluation.html>`_
diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst
index 8abf7e56..4985f92b 100644
--- a/docs/source/tutorials/index.rst
+++ b/docs/source/tutorials/index.rst
@@ -3,7 +3,7 @@
 .. _developer_notes:
 
 
-Developer Notes
+Tutorials
 =============================
 
 .. *Why and How Each Part works*
@@ -152,6 +152,22 @@ Components work on a sequence of ``Document`` and return a sequence of ``Documen
 
 .. Let us put all of these components together to build a :doc:`rag` (Retrieval Augmented Generation), which requires data processing pipeline along with a task pipeline to run user queries.
 
+
+
+
+Putting it all together
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+    :widths: 20 80
+    :header-rows: 1
+
+    * - Part
+      - Description
+    * - :doc:`rag_playbook`
+      - Comprehensive RAG playbook according to the sota research and the best practices in the industry.
+
+
 .. toctree::
    :maxdepth: 1
    :caption: RAG Essentials
@@ -165,6 +181,7 @@ Components work on a sequence of ``Document`` and return a sequence of ``Documen
    retriever
    text_splitter
    db
+   rag_playbook
 
 
 
@@ -273,7 +290,7 @@ Note: Documentation is work in progress for this section.
    * - Part
      - Description
    * - :doc:`parameter_`
-     - The `Parameter` class stores the text, textual gradidents(feedback), and manage the states and applies the backpropagation in auto-diff.
+     - The `Parameter` class stores the text, textual gradidents(feedback), and managed the states and applies the backpropagation in auto-diff.
    * - :doc:`optimizer_`
      - The  `Optimizer` to define a structure and to manage `propose`, `revert`, and `step` methods. We defined two variants: `DemoOptimizer` and `TextOptimizer` to cover the prompt optimization and the few-shot optimization.
    * - :doc:`few_shot_optimizer_`
diff --git a/docs/source/tutorials/parameter.rst b/docs/source/tutorials/parameter.rst
index 3565a563..150d828f 100644
--- a/docs/source/tutorials/parameter.rst
+++ b/docs/source/tutorials/parameter.rst
@@ -3,13 +3,31 @@
 Parameter
 ====================
 
+.. figure:: /_static/images/tensor_parameter.png
+    :align: center
+    :alt: AdalFlow Tensor and Parameter
+    :width: 620px
+
+
+    AdalFlow Tensor and Parameter
+
+Designing an auto-diff system for LLM task pipeline is actually quite challenging.
+First we made effort to make everything a component and it made the interactions between each components easier and more transparent at visualization.
+But every component takes in any type of data as input, just as in pytorch, input to compute in the auto-diff needs to be a tensor, and trinable parameters will be Parametr (a type of mutable tensor),
+this can be applied to the LLM task pipeline as well.
+LLM task pipeline and in-context learning will add new trainable parameters such as few-shot demos and instruction tuning.
 
 There are two types of parameters:
 
-* once-off, such as loss, y_pred, intermedia response where each run it will create a new one, and they are temporary and will be released after the run.
+* once-off, such as loss, y_pred, intermediate response where each run it will create a new one, and they are temporary and will be released after the run.
 * persistent, the parameters that we are optimizing, such as those with an actual type assigned `param_type` in the `Parameter` class.
    For the persistent parameters, the data type will be string.
 
+
+Each parameter has a:
+id: unique identifier
+name:
+
 Intermediate parameters
 ------------------------
 intermediate parameters data = Componnet.call output.
diff --git a/docs/source/tutorials/rag_playbook.rst b/docs/source/tutorials/rag_playbook.rst
new file mode 100644
index 00000000..41b6f097
--- /dev/null
+++ b/docs/source/tutorials/rag_playbook.rst
@@ -0,0 +1,399 @@
+.. <a href="https://colab.research.google.com/drive/1gmxeX1UuUxZDouWhkLGQYrD4hAdt9IVX?usp=sharing" target="_blank" style="margin-right: 10px;">
+..     <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg" style="vertical-align: middle;">
+.. </a>
+
+.. raw:: html
+
+   <div style="display: flex; justify-content: flex-start; align-items: center; margin-bottom: 20px;">
+
+      <a href="https://github.com/SylphAI-Inc/AdalFlow/blob/main/use_cases/rag/build" target="_blank" style="display: flex; align-items: center;">
+         <img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" style="height: 20px; width: 20px; margin-right: 5px;">
+         <span style="vertical-align: middle;"> Open Source Code</span>
+      </a>
+   </div>
+
+RAG Playbook
+================
+
+.. note::
+    This tutorial is still a work in progress. We will continue updating and improving it.
+    If you have any feedback, feel free to reach out to us in any of the following ways:
+    `Community <https://adalflow.sylph.ai/get_started/community.html>`_.
+
+In this playbook, we will provide a comprehensive RAG playbook according the sota research and the best practices in the industry.
+The outline of the playbook is as follows:
+
+- RAG Overview
+- From First RAG Paper to the diverse RAG design architecture
+- RAG design and tuning strategies for each component
+
+
+RAG Overview
+----------------
+
+
+
+.. .. figure:: /_static/images/RAG_workflow.png
+..     :align: center
+..     :alt: RAG Workflow
+..     :width: 700px
+
+..     RAG Workflow.
+
+
+.. figure:: /_static/images/RAG_architecture.png
+    :align: center
+    :alt: Three ways retriever interacts with the generator
+    :width: 700px
+
+    Three ways retriever interacts with the generator [4]_.
+
+
+Retrieval-Augmented Generation (RAG) is a paradigm that combines the strengths of retrieval to eliminate hallucination, knowledge cut-off problem of LLMs, and as a way to adapt to any doman-specific knowledge base.
+Moreover, being able to cite the source of knowledge is a big plus for the transparency and interpretability of any AI use case.
+
+.. code-block:: python
+
+    RAG_PROMPT_TEMPLATE = r"""<START_OF_SYSTEM_MESSAGE>
+    {{task_desc}}
+    <END_OF_SYSTEM_MESSAGE>
+    <START_OF_USER>
+    {{input_str}}
+    {{context_str}}
+    <END_OF_USER>
+    """
+
+Given a user query, RAG retrieves relevant passages from a large corpus and then generates a response based on the retrieved passages.
+This formulation opens up a wide range of use cases such as conversational search engine, question answering on a customized knowledge base,
+customer support, fact-checking.
+The template above shows the most commonly used format of RAG, where we pass a task description, concatenate the input string, and retrieve passages into a context string, which is then passed to an LLM model for generation.
+
+But, RAG is way more than that. Let's dive in.
+
+**First RAG Papers**
+
+RAG was introduced in 2020 by Lewis et al. from Meta [1]_ which is an architecture that finetunes both the query encoder (bi-encoder like most embedding models) and the generator (LLM) jointly with only final answer supervision.
+
+
+
+
+
+REALM [7]_ is another RAG model introduced the same year by Google not only finetunes both the retriever and the generator on the downstream tasks but also pretrained these two models jointly by randomly masking tokens in a simpled piece of text using masked langage model (MLM).
+
+
+The intial papers did not mention document chunking as most of the time, their text length is usally short and also fits into the context length of the embedding models.
+As both the embedding model and LLM model scales up in terms of knowledge and parameters (400M LLM model used in the paper), RAG can achieve high performance in few-shot (prompt engineering) setup without the finetune.
+
+However, the flexibility of the RAG also means that it requires careful design and tuning to achieve optimal performance.
+For each use case, we need to consider the following questions:
+
+1. What retrieval to use? And how many stages it should be? Do we need a reranker or even LLM to help with the retrieval stages?
+
+2. Which cloud-database can go well with the retrieval strategy and be able to scale?
+
+3. How do I evaluate the performance of the RAG as a whole? And what metrics can help me understand the retrieval stage in particular so that I know it is not hurting the overall performance?
+
+4. Do I need query expansion or any other techniques to improve the retrieval performance? How to avoid the performance degradation due to feeding the LLM irrelevant passages?
+
+5. How do I optimize the RAG hyperparameters such as the number of retrieved passages, the size of the chunk, and the overlap between chunks, or even the chunking strategy?
+
+6. Sometimes you need to even create your own customized/finetuned embedding/retriever models. How do I do that?
+
+7. How do I auto-optimize the RAG pipeline with In-context learning(ICLs) with zero-shot prompting and few-shot prompting?
+
+8. What about finetuning? How to do it and would it be more token efficient or more effective?
+
+.. **RAU (Retrieval Augmented Understanding)**
+
+.. There is also RAU.
+
+Designing RAG
+----------------------------------
+
+.. figure:: /_static/images/RAG_Enhancements.png
+    :align: center
+    :alt: RAG Enhancements
+    :width: 700px
+
+    RAG Enhancements from [8]_. Click to view the full image.
+
+========================  ==============================  =========================================
+RAG Component              Techniques                      Metrics
+========================  ==============================  =========================================
+Data Preparation           - Text preprocessing
+                           - Chunking Strategy
+
+Data Storage               - AdalFlow LocalDB
+                           - Cloud Database
+                           - Postgres + PgVector
+                           - qdrant
+                           - ...
+
+Embedding                  - Embedding Fine-tuning
+
+Indexing                   -
+
+Retrieval                  - Retrieval Optimization          - HIT@K
+                           - Query Enhancement               - MRR@K
+                           - Reranking                       - MAP@K
+                                                             - NDCG@K
+                                                             - AdalFlow context recall
+                                                             - Ragas context relevancy, precision, recall
+
+Generator                  - Manual Prompt Engineering       - Ragas answer relevancy
+                           - Auto Prompt Engineering         - ROUGE
+                           - LLM Fine-tuning                 - BLEU
+                                                             - METEOR
+                                                             - F1 Score
+                                                             - BERTScore
+                                                             - AdalFlow AnswerMatchAcc
+                                                             - AdalFlow LLM judge
+                                                             - AdalFlow G-Eval
+                                                             - UniEval
+========================  ==============================  =========================================
+
+TODO: make this a table that i can put in links. so that i can link together other tutorials to form a comprehensive playbook.
+- move this in the tutorial section.
+
+For benchmarking datasets and metrics, please refer to :ref:`Evaluation Guideline <tutorials-llm-evaluation>`.
+Additionally, FlashRAG [3]_ provides more references to RAG datasets and research.
+
+
+Data Preparation Pipeline
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Document Retrieval & Reranking
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Multi-stage retrieval from the cheapest, fastest, and least accurate to the most expensive, slowest, and most accurate is introduced in :ref:`Retriever <tutorials-retriever>`.
+
+RAG optimization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We can either optimize each component separately such as retriever or the generator drawing research that was designed for each, or optimize them jointly in the context of RAG.
+Sometimes we can use an agentic approach, such as Self-RAG [11]_.
+
+#TODO: fit hydro
+
+**Retrieval Optimization**
+
+As irrelevant passages, especially those positioned on top of the context can degrade the final performance, it is important to optimize the retrieval performance in particular:
+We have the following options:
+
+1. Hyperparmeters optimization: optimize the number of retrieved passages, the size of the chunk, and the overlap between chunks, or even the chunking strategy using retriever evaluation metrics or the final generator performance.
+2. Query expansion: improve the recall by expanding the query.
+3. Adapt the embedder with LLM supervision: adapt the embedder with LLM supervision to improve the retrieval recall and precision.
+4. Reranking: use a reranker as an additional stage to improve the retrieval accuracy.
+5. Use Retrieval Evaluator: use a retrieval evaluator to evaluate the relevance of the retrieved passages.
+
+
+**Generator Optimization**
+
+Ever since the first RAG papers, many LLMs with high parameters count and performance have been released.
+**In-context learning (ICL) or prompt engineering** has become the first choice over **model finetuning** to optimize the generator's performance on any task.
+You can use any optimization methods designed to improve the reasoning ability of the generator, such as chain-of-thought, reflection, etc.
+
+When Generator is used in the context of RAG, however, we need to consider the relation between (retrieved context, query, and generated response).
+And we need to optimize the generator on:
+
+1. How well can it use the relevant context to generate the response? Was it mislead by irrelevant passages?
+
+For generator, we have three popular options:
+
+
+1. Prompt-engineering: use zero-shot or few-shot learning to optimize the generator, or improve the generator response via more test-time tokens (e.g., chain-of-thought, reflection).
+
+2. Finetune the generator with instruction learning
+3. Finetune the generator in particular with the format of using context.
+
+In the future, we will provide a prompt engineering/ICL playbook and we will skip this part for now.
+
+Retrieval optimization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**Query Transformation**
+
+Query Expansion (QE) [16]_ is a common technique used in search engine to expand the user's search query to include additional documents.
+
+.. TODO: use a diagram where LLM is used between the query and the retrieved documents.
+
+In this new age of LLM, query can be rewritten/expanded via LLM.
+
+**Query Rewriting**
+
+By prompt-engineering the LLM to rewrite the initial query :math:`x` to :math:`x' = LLM(Prompt(x))`, we end up optimize the retriever performance without retraining the retriever as the paper Lewis et al. [1]_ did.
+By leveraging AdalFlow's in-context trainer, we can auto-optimize the RAG pipeline end to end.
+The only downside is to use more token bugets of the LLM model which will end up to be more expensive.
+
+Here we summarize a few methods and introduce AdalFlow's API.
+
+Query Rewriting paper [17]_ propose two ways to do the rewriting with LLM:
+
+* Few-shot prompt: to encourage the LLM to "reason" and output none, one or multiple queries that are relevant to the input query.
+
+* Trainable scheme: Use a smaller rewriter model to rewrite the query instead of a black-box LLM, to reduce the cost.
+The rewritter is trained using the feedback of the generator by reinforcement learning.
+It has two stages of training: warm-up where a synthetic dataset of :math:`(x, x')` pairs which has led to correct generator response is used to finetune the rewriter.
+Then, the rewriter is trained with reinforcement learning to align to the retriever and the genearator.
+
+
+
+**Adapt the embedder with LLM supervision**
+
+To improve the retrieval recall and precision, we can adapt the embedder with LLM supervision.
+The cheapest solutions requires only a linear layer on top of the embedding model along with a synthesized dataset of query-passage pairs generated from the data source using LLM models.
+This approach also applys to black-box embedding models. AdalFlow will consider to open-source this technique in the future.
+
+.. # TODO: replug is not as good as the emsemble is a bit hard to do and no source code.
+
+A second approach is to finetune the embedder directly. Replug [6]_ is a good example of this approach.
+Replug can be used with or without finetune.
+
+.. figure:: /_static/images/replug.png
+    :align: center
+    :alt: Replug inference pipeline
+    :width: 700px
+
+    Replug inference pipeline [6]_.
+
+When we do Replug, it computes the LLM output of each query and document pair separately in parallel and ensembles all the outputs to get the final score.
+This is especially helpful for inference speed and surpass the context length limitation of the LLM model.
+
+..
+    REPLUG LSR (REPLUGwith LM-Supervised Retrieval), which adapts the retrieverin REPLUG by using the LM itself to provide supervisionabout which documents should be retrieved.
+    This approach can be seen as adjusting the probabilities of the retrieved documents to match the probabilities of the output sequence perplexi-ties of the language model.
+    In theory, it is to align the retriever's probabilities likelihood on the retrieved passage with the probabilitie likelihood of the LLM model on the ground truth answer via KL-divergence.
+    This use the `logprobs` of the black-box LLM model. Read the more on logprob cookbook [9]_.
+
+.. The above replug lsr is not that more effective than the replug itself and it is meaningless to go through the hassle of implementing it.
+
+**Reranking**
+
+
+Rerankers are often cross-encoder between the query and documents. It is computationally more expensive but also more accurate. Cohere and Transformers both offer sota rerankers.
+
+**Use Retrieval Evaluator**
+
+C-RAG [10]_ proposed a lightweight retrieval evaluator that was finetuned on the training split of the testing datasets.
+More expensively, but without the need to train a model, we can use  LLM to classify the relevance of the retrieved passages, using labels such as "correct", "incorrect", "ambiguous", etc.
+
+Generator optimization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Besides of the three popular options mentioned above, there is a branch of research where the retrieved context is combined in the generator (enhanced generator) as a part of the model to integrate the context instead of simply combining it from the prompt.
+
+
+RAG pipeline optimization
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We introduce three popular overall optimization strategies for the RAG pipeline.
+
+Self-RAG
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. figure:: /_static/images/self_rag.png
+    :align: center
+    :alt: Self-RAG architecture
+    :width: 700px
+
+    Self-RAG architecture [11]_.
+
+
+Self-RAG is interesting as it is programmed to decide if retrieval is needed, it handles the retrieved passages separately in parallel to generate y_t for each query x and passage d_t.
+For each (x, d_t, y_t) pair it "reflects" on three metrics:
+
+- ISREL: use (x, d_t) to check if d_t provides useful information to solve x by outputing two labels (is_relevant, is_irrelevant).
+- ISSUP: use (x, d_t, y_t) to check if all of the worthy statements(answers the question) in y_t is supported by d_t by outputing three labels (is_supported, partically_supported, not_supported).
+- ISUSE: use (x, y_t) to check if y_t is useful to solve x by outputing 5 labels (5, 4, 3, 2, 1).
+
+It computes a single segment score unifying the three metrics and uses it to rerank the answer and pick the answer with the highest score as the final answer.
+The paper also mentioned how to create synthesized training dataset and train the `critic` and `generator` model.
+Good thing is Self-RAG can be used with or without finetune.
+
+Self-RAG can be applied on complicated tasks that require high accuracy, but it is way more complicated than a vanila RAG.
+
+REALM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+REALM [7]_ is quite interesting and it has a clear optimization objective.
+
+.. figure:: /_static/images/REALM_train_architecture.png
+    :align: center
+    :alt: REALM Train Architecture
+    :width: 700px
+
+    REALM [7]_ Framework.
+
+**Retrieve-Then-Predict Process**
+
+REALM models the task as a "retrieve-then-predict" process:
+
+First, the retriever samples documents :math:`z` from a large knowledge corpus :math:`Z` based on the input :math:`x`. This retrieval is modeled by :math:`p(z | x)`, the probability of retrieving document :math:`z` given input :math:`x`.
+
+Then, the model predicts the missing words or answers based on both the input :math:`x` and the retrieved document :math:`z`, modeled as :math:`p(y | z, x)`, where :math:`y` is the prediction (e.g., masked tokens or answers).
+
+**Marginalizing Over All Possible Documents**
+
+The probability of correctly predicting the target output :math:`y` given input :math:`x` is computed by marginalizing over all possible documents in the knowledge corpus :math:`Z`:
+
+.. math::
+
+    p(y | x) = \sum_{z \in Z} p(y | z, x) \cdot p(z | x)
+
+
+This means that the overall probability is a weighted sum of how well each document :math:`z` helps predict :math:`y`, weighted by the retriever’s belief :math:`p(z | x)` in that document.
+
+**Loss Function and Gradient Optimization**
+
+The key to optimizing the retriever is to maximize the likelihood of the correct prediction  :math:`y` by adjusting the probability :math:`p(z | x)` of retrieving relevant documents.
+The log-likelihood of the correct prediction :math:`y` is the training objective:
+
+.. math::
+
+    \mathcal{L} = \log p(y | x) = \log \left( \sum_{z \in Z} p(y | z, x) \cdot p(z | x) \right)
+
+**Rewarding Relevant Documents**
+
+To see how the retriever is rewarded or punished, consider the gradient of the log-likelihood with respect to the retriever’s scoring function  :math:`f(x, z)` (which measures how relevant document :math:`z` is to input :math:`x`):
+
+.. math::
+
+    \frac{\partial \log p(y | x)}{\partial f(x, z)} = \left[ \frac{p(y | z, x)}{p(y | x)} - 1 \right] p(z | x)
+
+Here’s how this works:
+
+- If the document :math:`z` improves the prediction of :math:`y` (i.e., :math:`p(y | z, x) > p(y | x)`), the gradient is positive, and the retriever is encouraged to increase the score :math:`f(x, z)`, making it more likely to retrieve that document in the future.
+
+- If the document :math:`z` does not help (i.e., :math:`p(y | z, x) < p(y | x)`), the gradient is negative, and the retriever is encouraged to decrease the score :math:`f(x, z)`, making it less likely to retrieve that document.
+
+.. FLARE
+.. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+References
+------------------------------------------
+.. [1] Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks:https://arxiv.org/abs/2005.11401
+.. [2] GOVTech Singapore's RAG playbook: https://playbooks.capdev.govtext.gov.sg/improving_rag/
+.. [3] FlashRAG: Python toolkit for the reproduction and development of RAG research: https://github.com/RUC-NLPIR/FlashRAG
+.. [4] RAG and RAU: A Survey on Retrieval-Augmented Language Model inNatural Language Processing: https://github.com/2471023025/RALM_Survey
+.. [5] Ruochen Zhao, Hailin Chen, Weishi Wang, FangkaiJiao, Xuan Long Do, Chengwei Qin, BoshengDing, Xiaobao Guo, Minzhi Li, Xingxuan Li, et al.2023. Retrieving multimodal information for aug-mented generation: A survey. arXiv preprintarXiv:2303.10868.
+.. [6] Replug: Retrieval-augmented black-box language models. arXivpreprint arXiv:2301.12652
+.. [7] REALM: Kelvin Guu, Kenton Lee, Zora Tung, Panupong Pasu-pat, and Mingwei Chang. 2020. Retrieval augmentedlanguage model pre-training. In International confer-ence on machine learning, pages 3929–3938. PMLR.
+.. [8] Retrieval-Augmented Generation for AI-Generated Content: A Survey
+.. [9] OpenAI logprobs cookbook: https://cookbook.openai.com/examples/using_logprobs
+.. [10] C-RAG: Corrective retrieval augmented generation.arXiv preprint arXiv:2401.15884.
+.. [11] Self-RAG: Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, andHannaneh Hajishirzi. 2023. Self-rag: Learning toretrieve, generate, and critique through self-reflection.CoRR, abs/2310.11511.
+.. [12] Replug implemented: https://github.com/IntelLabs/fastRAG/blob/main/examples/replug_parallel_reader.ipynb
+.. [13] FastRAG: https://github.com/IntelLabs/fastRAG
+.. [14] FLARE: Zhengbao Jiang, Frank F Xu, Luyu Gao, ZhiqingSun, Qian Liu, Jane Dwivedi-Yu, Yiming Yang,Jamie Callan, and Graham Neubig. 2023c. Ac-tive retrieval augmented generation. arXiv preprintarXiv:2305.06983.
+.. [15] Yuning Mao, Pengcheng He, Xiaodong Liu, Ye-long Shen, Jianfeng Gao, Jiawei Han, and WeizhuChen. 2020. Generation-augmented retrieval for open-domain question answering. arXiv preprintarXiv:2009.08553.
+.. [16] Query Expansion: https://en.wikipedia.org/wiki/Query_expansion
+.. [17] Ma, Xinbei, et al. "Query rewriting for retrieval-augmented large language models." arXiv preprint arXiv:2305.14283 (2023).
+..
+    TODO:
+     - replug generator implementation(current fast rag implemented it with haystack)
+     - self-RAG implementation (raw response, and we might need to add a api response to save logprobs and everything that user can customize)
+     - opensource the embedder finetune.
+     - extend: all these research can be provided as extend and we need to think of a way to organize it.
+     - Query expansion (focus on query transformation)
+     - 1. add a summary for each document and save it in meta_data
+     - 2. query transformation
+     - 3. query rewriting to a form fitting to a particular database
+     - use hotpot qa, rewrite the query (use a queryrewritter out of box) and then do auto-optimization
diff --git a/docs/source/tutorials/retriever.rst b/docs/source/tutorials/retriever.rst
index 1cd39225..7e1a30dd 100644
--- a/docs/source/tutorials/retriever.rst
+++ b/docs/source/tutorials/retriever.rst
@@ -222,13 +222,8 @@ As an example, :class:`BM25Retriever<components.retriever.bm25_retriever.BM25Ret
     self.index_keys = ["nd", "t2d", "idf","doc_len","avgdl","total_documents","top_k","k1","b","epsilon","indexed"]
 
 
-Retriever in Action
+Experiment data
 --------------------
-All of our retrievers are  subclassed from the base retriever, and they are located in the ``components.retriever`` module.
-You can skim through their implementations here: :ref:`retriever<components-retriever>`.
-Currently only :class:`BM25Retriever<components.retriever.faiss_retriever.BM25Retriever>` needs to have its own ``save_to_file`` and ``load_from_file`` to avoid recomputation again.
-The ``FAISSRetriever`` will work with a database instead to store the embeddings and it alleviates the need for the retriever to deal with states saving.
-
 In this note, we will use the following documents and queries for demonstration:
 
 .. code-block:: python
@@ -257,6 +252,43 @@ In this note, we will use the following documents and queries for demonstration:
 
 The first query should retrieve the first and the last document, and the second query should retrieve the second and the third document.
 
+Documents filtering
+--------------------
+Before using more advanced retrieval methods, it is common to filter the documents first.
+Document filtering is dependent on your data storage, whether it is in memory, local disk, or cloud database.
+For the cloud database, it is highly dependent on the database's search and filter methods. And SQL-based search is common, scalable, and efficient.
+
+If you are using `LocalDB` and `Document` as the data item, you can use the `filter` method to filter the documents.
+
+Before you pass the documents or processed document chunks and embeddings to the retriever, you can filter the documents first.
+
+.. code-block:: python
+
+    from adalflow.core.db import LocalDB
+    from adalflow.core.types import Document
+
+    db = LocalDB()
+    db.connect()
+
+    # Add the documents to the database
+    for doc in documents:
+        db.add_item(Document(**doc))
+
+    # Filter the documents
+    filtered_documents = db.filter(Document, title="Solar Panels")
+
+    print(filtered_documents)
+
+
+Retriever in Action
+--------------------
+All of our retrievers are  subclassed from the base retriever, and they are located in the ``components.retriever`` module.
+You can skim through their implementations here: :ref:`retriever<components-retriever>`.
+Currently only :class:`BM25Retriever<components.retriever.faiss_retriever.BM25Retriever>` needs to have its own ``save_to_file`` and ``load_from_file`` to avoid recomputation again.
+The ``FAISSRetriever`` will work with a database instead to store the embeddings and it alleviates the need for the retriever to deal with states saving.
+
+
+
 FAISSRetriever
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 First, let's do semantic search, here we will use in-memory :class:`FAISSRetriever<components.retriever.faiss_retriever.FAISSRetriever>`.
diff --git a/docs/source/use_cases/build_a_rag.rst b/docs/source/use_cases/build_a_rag.rst
index ce15bb49..69c97b20 100644
--- a/docs/source/use_cases/build_a_rag.rst
+++ b/docs/source/use_cases/build_a_rag.rst
@@ -15,6 +15,14 @@
 Designing RAG
 ================
 
+
+.. figure:: /_static/images/generator.png
+    :align: center
+    :alt: AdalFlow generator design
+    :width: 700px
+
+    Generator - The Orchestrator for LLM Prediction
+
 Retrieval-Augmented Generation (RAG) is a paradigm that combines the strengths of retrieval and generation models.
 Given a user query, RAG retrieves relevant passages from a large corpus and then generates a response based on the retrieved passages.
 This formulation opens up a wide range of use cases such as conversational search engine, question answering on a customized knowledge base,
@@ -39,3 +47,51 @@ For each use case, we need to answer:
 7. How do I auto-optimize the RAG pipeline with In-context learning(ICLs) with zero-shot prompting and few-shot prompting?
 
 8. What about finetuning? How to do it and would it be more token efficient or more effective?
+
+In this tutorial, we will cover:
+
+- We will provide resource links to form a comprehensive RAG playbook according the sota research and the best practices in the industry.
+- Build a local and model-agnostic RAG pipeline and data processing pipeline with AdalFlow library.
+- Add Query Expansion.
+
+First RAG Paper
+------------------
+RAG was introduced in 2020 by Lewis et al. [1]_ which is an architecture that finetunes both the query encoder (bi-encoder like most embedding models) and the generator (LLM) jointly with only final answer supervision.
+It did not mention document chunking as most of the time, their text length is usally short and also fits into the context length of the embedding models.
+As both the embedding model and LLM model scales up in terms of knowledge and parameters (400M LLM model used in the paper), RAG can achieve high performance in few-shot (prompt engineering) setup without the finetune.
+
+RAG Playbook
+------------------
+
+========================  =========================  =========================================
+RAG Pipeline Component     Improvement Techniques     Evaluation Metric
+========================  =========================  =========================================
+Data Preparation           - Text preprocessing       -
+                           - Chunking Strategy
+
+Embedding                 - Embedding Fine-tuning     -
+
+Indexing                   -                          -
+
+Retrieval                  - Retrieval Optimization    - HIT@K
+                           - Query Enhancement         - MRR@K
+                           - Reranking                 - MAP@K
+                                                       - NDCG@K
+                                                       - Ragas context relevancy, precision, recall
+
+Completion                 - Prompt Engineering        - Ragas answer relevancy
+                           - LLM Fine-tuning           - AutoAIS
+                                                       - ROUGE
+                                                       - BLEU
+                                                       - METEOR
+                                                       - F1 Score
+                                                       - BERTScore
+                                                       - UniEval
+                                                       - G-Eval
+========================  =========================  =========================================
+
+
+References
+------------------------------------------
+.. [1] Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks:https://arxiv.org/abs/2005.11401
+.. [2] RAG playbook: https://playbooks.capdev.govtext.gov.sg/
diff --git a/docs/source/use_cases/index.rst b/docs/source/use_cases/index.rst
index 419bdf8b..aa6246b8 100644
--- a/docs/source/use_cases/index.rst
+++ b/docs/source/use_cases/index.rst
@@ -17,6 +17,8 @@ We will build use cases end-to-end, ranging from classification (classical NLP t
 
     * - Part
       - Description
+    * - :doc:`rag_playbook`
+      - Comprehensive RAG playbook according to the sota research and the best practices in the industry.
     * - :doc:`build_a_rag`
       - Designing a RAG pipeline, from offline data processing to online inference.
     * - :doc:`eval_a_rag`
@@ -27,6 +29,7 @@ We will build use cases end-to-end, ranging from classification (classical NLP t
     :caption: RAG vibe
     :hidden:
 
+    rag_playbook
     build_a_rag
     eval_a_rag
 
diff --git a/poetry.lock b/poetry.lock
index e2c0cd94..b9ec8081 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -44,7 +44,7 @@ testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized",
 
 [[package]]
 name = "adalflow"
-version = "0.2.0.beta.3"
+version = "0.2.2"
 description = "The Library to Build and Auto-optimize Any LLM Task Pipeline"
 optional = false
 python-versions = ">=3.9, <4.0"
@@ -67,6 +67,7 @@ tqdm = "^4.66.4"
 [package.extras]
 anthropic = ["anthropic (>=0.31.1,<0.32.0)"]
 cohere = ["cohere (>=5.5.8,<6.0.0)"]
+datasets = []
 faiss-cpu = ["faiss-cpu (>=1.8.0,<2.0.0)"]
 google-generativeai = ["google-generativeai (>=0.7.2,<0.8.0)"]
 groq = ["groq (>=0.9.0,<0.10.0)"]
@@ -1008,45 +1009,6 @@ files = [
     {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
 ]
 
-[[package]]
-name = "deepeval"
-version = "1.1.6"
-description = "The open-source evaluation framework for LLMs."
-optional = false
-python-versions = "*"
-files = [
-    {file = "deepeval-1.1.6-py3-none-any.whl", hash = "sha256:58d204257645d49b3146d53bfb6dc445021406427c71fe2f7aec8cd7021f9f81"},
-    {file = "deepeval-1.1.6.tar.gz", hash = "sha256:7ced9ecfc038eceafd08e2fb041f2520d594b823fec1e5649cc3b5688d1390c0"},
-]
-
-[package.dependencies]
-docx2txt = ">=0.8,<1.0"
-grpcio = ">=1.63.0,<1.64.0"
-importlib-metadata = ">=6.0.2"
-langchain = "*"
-langchain-core = "*"
-langchain-openai = "*"
-opentelemetry-api = ">=1.24.0,<1.25.0"
-opentelemetry-exporter-otlp-proto-grpc = ">=1.24.0,<1.25.0"
-opentelemetry-sdk = ">=1.24.0,<1.25.0"
-portalocker = "*"
-protobuf = "*"
-pydantic = "*"
-pytest = "*"
-pytest-repeat = "*"
-pytest-xdist = "*"
-ragas = "*"
-requests = "*"
-rich = "*"
-sentry-sdk = "*"
-tabulate = "*"
-tenacity = ">=8.4.1,<8.5.0"
-tqdm = "*"
-typer = "*"
-
-[package.extras]
-dev = ["black"]
-
 [[package]]
 name = "defusedxml"
 version = "0.7.1"
@@ -1058,23 +1020,6 @@ files = [
     {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
 ]
 
-[[package]]
-name = "deprecated"
-version = "1.2.14"
-description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
-files = [
-    {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"},
-    {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"},
-]
-
-[package.dependencies]
-wrapt = ">=1.10,<2"
-
-[package.extras]
-dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"]
-
 [[package]]
 name = "dill"
 version = "0.3.8"
@@ -1123,16 +1068,6 @@ files = [
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
 ]
 
-[[package]]
-name = "docx2txt"
-version = "0.8"
-description = "A pure python-based utility to extract text and images from docx files."
-optional = false
-python-versions = "*"
-files = [
-    {file = "docx2txt-0.8.tar.gz", hash = "sha256:2c06d98d7cfe2d3947e5760a57d924e3ff07745b379c8737723922e7009236e5"},
-]
-
 [[package]]
 name = "dspy-ai"
 version = "2.4.13"
@@ -1173,20 +1108,6 @@ qdrant = ["fastembed", "qdrant-client"]
 snowflake = ["snowflake-snowpark-python"]
 weaviate = ["weaviate-client (>=4.6.5,<4.7.0)"]
 
-[[package]]
-name = "execnet"
-version = "2.1.1"
-description = "execnet: rapid multi-Python deployment"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"},
-    {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"},
-]
-
-[package.extras]
-testing = ["hatch", "pre-commit", "pytest", "tox"]
-
 [[package]]
 name = "executing"
 version = "2.0.1"
@@ -1539,23 +1460,6 @@ tqdm = "*"
 [package.extras]
 test = ["build", "mypy", "pytest", "pytest-xdist", "ruff", "twine", "types-requests", "types-setuptools"]
 
-[[package]]
-name = "googleapis-common-protos"
-version = "1.65.0"
-description = "Common protobufs used in Google APIs"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "googleapis_common_protos-1.65.0-py2.py3-none-any.whl", hash = "sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63"},
-    {file = "googleapis_common_protos-1.65.0.tar.gz", hash = "sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0"},
-]
-
-[package.dependencies]
-protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
-
-[package.extras]
-grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
-
 [[package]]
 name = "graphviz"
 version = "0.20.3"
@@ -1846,36 +1750,6 @@ files = [
     {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
 ]
 
-[[package]]
-name = "importlib-metadata"
-version = "7.0.0"
-description = "Read metadata from Python packages"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "importlib_metadata-7.0.0-py3-none-any.whl", hash = "sha256:d97503976bb81f40a193d41ee6570868479c69d5068651eb039c40d850c59d67"},
-    {file = "importlib_metadata-7.0.0.tar.gz", hash = "sha256:7fc841f8b8332803464e5dc1c63a2e59121f46ca186c0e2e182e80bf8c1319f7"},
-]
-
-[package.dependencies]
-zipp = ">=0.5"
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]
-
-[[package]]
-name = "iniconfig"
-version = "2.0.0"
-description = "brain-dead simple config-ini parsing"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
-    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
-]
-
 [[package]]
 name = "ipykernel"
 version = "6.29.5"
@@ -2771,30 +2645,6 @@ files = [
 docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
 testing = ["coverage", "pyyaml"]
 
-[[package]]
-name = "markdown-it-py"
-version = "3.0.0"
-description = "Python port of markdown-it. Markdown parsing, done right!"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
-    {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
-]
-
-[package.dependencies]
-mdurl = ">=0.1,<1.0"
-
-[package.extras]
-benchmarking = ["psutil", "pytest", "pytest-benchmark"]
-code-style = ["pre-commit (>=3.0,<4.0)"]
-compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
-linkify = ["linkify-it-py (>=1,<3)"]
-plugins = ["mdit-py-plugins"]
-profiling = ["gprof2dot"]
-rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
-testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
-
 [[package]]
 name = "markupsafe"
 version = "2.1.5"
@@ -2949,17 +2799,6 @@ files = [
 [package.dependencies]
 traitlets = "*"
 
-[[package]]
-name = "mdurl"
-version = "0.1.2"
-description = "Markdown URL utilities"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
-    {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
-]
-
 [[package]]
 name = "mistune"
 version = "3.0.2"
@@ -3534,99 +3373,6 @@ typing-extensions = ">=4.11,<5"
 [package.extras]
 datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
 
-[[package]]
-name = "opentelemetry-api"
-version = "1.24.0"
-description = "OpenTelemetry Python API"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "opentelemetry_api-1.24.0-py3-none-any.whl", hash = "sha256:0f2c363d98d10d1ce93330015ca7fd3a65f60be64e05e30f557c61de52c80ca2"},
-    {file = "opentelemetry_api-1.24.0.tar.gz", hash = "sha256:42719f10ce7b5a9a73b10a4baf620574fb8ad495a9cbe5c18d76b75d8689c67e"},
-]
-
-[package.dependencies]
-deprecated = ">=1.2.6"
-importlib-metadata = ">=6.0,<=7.0"
-
-[[package]]
-name = "opentelemetry-exporter-otlp-proto-common"
-version = "1.24.0"
-description = "OpenTelemetry Protobuf encoding"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "opentelemetry_exporter_otlp_proto_common-1.24.0-py3-none-any.whl", hash = "sha256:e51f2c9735054d598ad2df5d3eca830fecfb5b0bda0a2fa742c9c7718e12f641"},
-    {file = "opentelemetry_exporter_otlp_proto_common-1.24.0.tar.gz", hash = "sha256:5d31fa1ff976cacc38be1ec4e3279a3f88435c75b38b1f7a099a1faffc302461"},
-]
-
-[package.dependencies]
-opentelemetry-proto = "1.24.0"
-
-[[package]]
-name = "opentelemetry-exporter-otlp-proto-grpc"
-version = "1.24.0"
-description = "OpenTelemetry Collector Protobuf over gRPC Exporter"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "opentelemetry_exporter_otlp_proto_grpc-1.24.0-py3-none-any.whl", hash = "sha256:f40d62aa30a0a43cc1657428e59fcf82ad5f7ea8fff75de0f9d9cb6f739e0a3b"},
-    {file = "opentelemetry_exporter_otlp_proto_grpc-1.24.0.tar.gz", hash = "sha256:217c6e30634f2c9797999ea9da29f7300479a94a610139b9df17433f915e7baa"},
-]
-
-[package.dependencies]
-deprecated = ">=1.2.6"
-googleapis-common-protos = ">=1.52,<2.0"
-grpcio = ">=1.0.0,<2.0.0"
-opentelemetry-api = ">=1.15,<2.0"
-opentelemetry-exporter-otlp-proto-common = "1.24.0"
-opentelemetry-proto = "1.24.0"
-opentelemetry-sdk = ">=1.24.0,<1.25.0"
-
-[package.extras]
-test = ["pytest-grpc"]
-
-[[package]]
-name = "opentelemetry-proto"
-version = "1.24.0"
-description = "OpenTelemetry Python Proto"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "opentelemetry_proto-1.24.0-py3-none-any.whl", hash = "sha256:bcb80e1e78a003040db71ccf83f2ad2019273d1e0828089d183b18a1476527ce"},
-    {file = "opentelemetry_proto-1.24.0.tar.gz", hash = "sha256:ff551b8ad63c6cabb1845ce217a6709358dfaba0f75ea1fa21a61ceddc78cab8"},
-]
-
-[package.dependencies]
-protobuf = ">=3.19,<5.0"
-
-[[package]]
-name = "opentelemetry-sdk"
-version = "1.24.0"
-description = "OpenTelemetry Python SDK"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "opentelemetry_sdk-1.24.0-py3-none-any.whl", hash = "sha256:fa731e24efe832e98bcd90902085b359dcfef7d9c9c00eb5b9a18587dae3eb59"},
-    {file = "opentelemetry_sdk-1.24.0.tar.gz", hash = "sha256:75bc0563affffa827700e0f4f4a68e1e257db0df13372344aebc6f8a64cde2e5"},
-]
-
-[package.dependencies]
-opentelemetry-api = "1.24.0"
-opentelemetry-semantic-conventions = "0.45b0"
-typing-extensions = ">=3.7.4"
-
-[[package]]
-name = "opentelemetry-semantic-conventions"
-version = "0.45b0"
-description = "OpenTelemetry Semantic Conventions"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "opentelemetry_semantic_conventions-0.45b0-py3-none-any.whl", hash = "sha256:a4a6fb9a7bacd9167c082aa4681009e9acdbfa28ffb2387af50c2fef3d30c864"},
-    {file = "opentelemetry_semantic_conventions-0.45b0.tar.gz", hash = "sha256:7c84215a44ac846bc4b8e32d5e78935c5c43482e491812a0bb8aaf87e4d92118"},
-]
-
 [[package]]
 name = "optuna"
 version = "3.6.1"
@@ -3994,40 +3740,6 @@ docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"]
 type = ["mypy (>=1.8)"]
 
-[[package]]
-name = "pluggy"
-version = "1.5.0"
-description = "plugin and hook calling mechanisms for python"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
-    {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
-]
-
-[package.extras]
-dev = ["pre-commit", "tox"]
-testing = ["pytest", "pytest-benchmark"]
-
-[[package]]
-name = "portalocker"
-version = "2.10.1"
-description = "Wraps the portalocker recipe for easy usage"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf"},
-    {file = "portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f"},
-]
-
-[package.dependencies]
-pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""}
-
-[package.extras]
-docs = ["sphinx (>=1.7.1)"]
-redis = ["redis"]
-tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"]
-
 [[package]]
 name = "pre-commit"
 version = "3.8.0"
@@ -4413,60 +4125,6 @@ files = [
     {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"},
 ]
 
-[[package]]
-name = "pytest"
-version = "8.3.2"
-description = "pytest: simple powerful testing with Python"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"},
-    {file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"},
-]
-
-[package.dependencies]
-colorama = {version = "*", markers = "sys_platform == \"win32\""}
-iniconfig = "*"
-packaging = "*"
-pluggy = ">=1.5,<2"
-
-[package.extras]
-dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
-
-[[package]]
-name = "pytest-repeat"
-version = "0.9.3"
-description = "pytest plugin for repeating tests"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "pytest_repeat-0.9.3-py3-none-any.whl", hash = "sha256:26ab2df18226af9d5ce441c858f273121e92ff55f5bb311d25755b8d7abdd8ed"},
-    {file = "pytest_repeat-0.9.3.tar.gz", hash = "sha256:ffd3836dfcd67bb270bec648b330e20be37d2966448c4148c4092d1e8aba8185"},
-]
-
-[package.dependencies]
-pytest = "*"
-
-[[package]]
-name = "pytest-xdist"
-version = "3.6.1"
-description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"},
-    {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"},
-]
-
-[package.dependencies]
-execnet = ">=2.1"
-pytest = ">=7.0.0"
-
-[package.extras]
-psutil = ["psutil (>=3.0)"]
-setproctitle = ["setproctitle"]
-testing = ["filelock"]
-
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -4973,24 +4631,6 @@ files = [
     {file = "rfc3986_validator-0.1.1.tar.gz", hash = "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055"},
 ]
 
-[[package]]
-name = "rich"
-version = "13.8.0"
-description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
-optional = false
-python-versions = ">=3.7.0"
-files = [
-    {file = "rich-13.8.0-py3-none-any.whl", hash = "sha256:2e85306a063b9492dffc86278197a60cbece75bcb766022f3436f567cae11bdc"},
-    {file = "rich-13.8.0.tar.gz", hash = "sha256:a5ac1f1cd448ade0d59cc3356f7db7a7ccda2c8cbae9c7a90c28ff463d3e91f4"},
-]
-
-[package.dependencies]
-markdown-it-py = ">=2.2.0"
-pygments = ">=2.13.0,<3.0.0"
-
-[package.extras]
-jupyter = ["ipywidgets (>=7.5.1,<9)"]
-
 [[package]]
 name = "rpds-py"
 version = "0.20.0"
@@ -5268,57 +4908,6 @@ nativelib = ["pyobjc-framework-Cocoa", "pywin32"]
 objc = ["pyobjc-framework-Cocoa"]
 win32 = ["pywin32"]
 
-[[package]]
-name = "sentry-sdk"
-version = "2.13.0"
-description = "Python client for Sentry (https://sentry.io)"
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "sentry_sdk-2.13.0-py2.py3-none-any.whl", hash = "sha256:6beede8fc2ab4043da7f69d95534e320944690680dd9a963178a49de71d726c6"},
-    {file = "sentry_sdk-2.13.0.tar.gz", hash = "sha256:8d4a576f7a98eb2fdb40e13106e41f330e5c79d72a68be1316e7852cf4995260"},
-]
-
-[package.dependencies]
-certifi = "*"
-urllib3 = ">=1.26.11"
-
-[package.extras]
-aiohttp = ["aiohttp (>=3.5)"]
-anthropic = ["anthropic (>=0.16)"]
-arq = ["arq (>=0.23)"]
-asyncpg = ["asyncpg (>=0.23)"]
-beam = ["apache-beam (>=2.12)"]
-bottle = ["bottle (>=0.12.13)"]
-celery = ["celery (>=3)"]
-celery-redbeat = ["celery-redbeat (>=2)"]
-chalice = ["chalice (>=1.16.0)"]
-clickhouse-driver = ["clickhouse-driver (>=0.2.0)"]
-django = ["django (>=1.8)"]
-falcon = ["falcon (>=1.4)"]
-fastapi = ["fastapi (>=0.79.0)"]
-flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"]
-grpcio = ["grpcio (>=1.21.1)", "protobuf (>=3.8.0)"]
-httpx = ["httpx (>=0.16.0)"]
-huey = ["huey (>=2)"]
-huggingface-hub = ["huggingface-hub (>=0.22)"]
-langchain = ["langchain (>=0.0.210)"]
-litestar = ["litestar (>=2.0.0)"]
-loguru = ["loguru (>=0.5)"]
-openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"]
-opentelemetry = ["opentelemetry-distro (>=0.35b0)"]
-opentelemetry-experimental = ["opentelemetry-distro"]
-pure-eval = ["asttokens", "executing", "pure-eval"]
-pymongo = ["pymongo (>=3.1)"]
-pyspark = ["pyspark (>=2.4.4)"]
-quart = ["blinker (>=1.1)", "quart (>=0.16.1)"]
-rq = ["rq (>=0.6)"]
-sanic = ["sanic (>=0.8)"]
-sqlalchemy = ["sqlalchemy (>=1.2)"]
-starlette = ["starlette (>=0.19.1)"]
-starlite = ["starlite (>=1.48)"]
-tornado = ["tornado (>=6)"]
-
 [[package]]
 name = "setuptools"
 version = "72.1.0"
@@ -5335,17 +4924,6 @@ core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.te
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
 test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.11.*)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (<0.4)", "pytest-ruff (>=0.2.1)", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 
-[[package]]
-name = "shellingham"
-version = "1.5.4"
-description = "Tool to Detect Surrounding Shell"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"},
-    {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
-]
-
 [[package]]
 name = "six"
 version = "1.16.0"
@@ -5519,20 +5097,6 @@ mpmath = ">=1.1.0,<1.4"
 [package.extras]
 dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"]
 
-[[package]]
-name = "tabulate"
-version = "0.9.0"
-description = "Pretty-print tabular data"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
-    {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
-]
-
-[package.extras]
-widechars = ["wcwidth"]
-
 [[package]]
 name = "tenacity"
 version = "8.4.2"
@@ -6061,23 +5625,6 @@ build = ["cmake (>=3.20)", "lit"]
 tests = ["autopep8", "flake8", "isort", "llnl-hatchet", "numpy", "pytest", "scipy (>=1.7.1)"]
 tutorials = ["matplotlib", "pandas", "tabulate"]
 
-[[package]]
-name = "typer"
-version = "0.12.5"
-description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b"},
-    {file = "typer-0.12.5.tar.gz", hash = "sha256:f592f089bedcc8ec1b974125d64851029c3b1af145f04aca64d69410f0c9b722"},
-]
-
-[package.dependencies]
-click = ">=8.0.0"
-rich = ">=10.11.0"
-shellingham = ">=1.3.0"
-typing-extensions = ">=3.7.4.3"
-
 [[package]]
 name = "types-python-dateutil"
 version = "2.9.0.20240316"
@@ -6359,85 +5906,6 @@ files = [
     {file = "widgetsnbextension-4.0.11.tar.gz", hash = "sha256:8b22a8f1910bfd188e596fe7fc05dcbd87e810c8a4ba010bdb3da86637398474"},
 ]
 
-[[package]]
-name = "wrapt"
-version = "1.16.0"
-description = "Module for decorators, wrappers and monkey patching."
-optional = false
-python-versions = ">=3.6"
-files = [
-    {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"},
-    {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"},
-    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"},
-    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"},
-    {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"},
-    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"},
-    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"},
-    {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"},
-    {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"},
-    {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"},
-    {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"},
-    {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"},
-    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"},
-    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"},
-    {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"},
-    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"},
-    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"},
-    {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"},
-    {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"},
-    {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"},
-    {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"},
-    {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"},
-    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"},
-    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"},
-    {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"},
-    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"},
-    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"},
-    {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"},
-    {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"},
-    {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"},
-    {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"},
-    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"},
-    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"},
-    {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"},
-    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"},
-    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"},
-    {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"},
-    {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"},
-    {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"},
-    {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"},
-    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"},
-    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"},
-    {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"},
-    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"},
-    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"},
-    {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"},
-    {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"},
-    {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"},
-    {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"},
-    {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"},
-    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"},
-    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"},
-    {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"},
-    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"},
-    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"},
-    {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"},
-    {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"},
-    {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"},
-    {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"},
-    {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"},
-    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"},
-    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"},
-    {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"},
-    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"},
-    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"},
-    {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"},
-    {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"},
-    {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"},
-    {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"},
-    {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"},
-]
-
 [[package]]
 name = "xxhash"
 version = "3.4.1"
@@ -6658,26 +6126,7 @@ files = [
 idna = ">=2.0"
 multidict = ">=4.0"
 
-[[package]]
-name = "zipp"
-version = "3.20.1"
-description = "Backport of pathlib-compatible object wrapper for zip files"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "zipp-3.20.1-py3-none-any.whl", hash = "sha256:9960cd8967c8f85a56f920d5d507274e74f9ff813a0ab8889a5b5be2daf44064"},
-    {file = "zipp-3.20.1.tar.gz", hash = "sha256:c22b14cc4763c5a5b04134207736c107db42e9d3ef2d9779d465f5f1bcba572b"},
-]
-
-[package.extras]
-check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
-cover = ["pytest-cov"]
-doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-enabler = ["pytest-enabler (>=2.2)"]
-test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
-type = ["pytest-mypy"]
-
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <4.0"
-content-hash = "4d44108f296caafc4f938300bcd09141d2bed45c88bfbed06081be67f01ae868"
+content-hash = "8b1723987cb3d2721c57498985eccc459c0bea4450709f3cb119efcb7e80bbf5"
diff --git a/pyproject.toml b/pyproject.toml
index a1334076..e0c5eb94 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,24 +45,10 @@ tensorboard = "^2.17.0"
 dspy-ai = "^2.4.13"
 
 
-# [tool.poetry.group.doc.dependencies]
-# pydata-sphinx-theme = "^0.15.3"
-# sphinx-design = "^0.6.0"
-# sphinx-copybutton = "^0.5.2"
-# sphinx = "^7.3.7"
-# nbsphinx = "^0.9.4"
-# nbconvert = "^7.16.4"
-# pandoc = "^2.3"
-# readthedocs-sphinx-search = "^0.3.2"
-# sqlalchemy = "^2.0.31"
-# google-generativeai = "^0.7.1"
-# faiss-cpu = "^1.8.0.post1"
-# lightrag = { path = "lightrag", develop = true }
 transformers = "^4.44.0"
 accelerate = "^0.33.0"
 faiss-cpu = "^1.8.0.post1"
 nltk = "^3.9.1"
-deepeval = "^1.1.6"
 ragas = "^0.1.16"
 
 
diff --git a/tutorials/retriever/__init__.py b/tutorials/retriever/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tutorials/retriever/data.py b/tutorials/retriever/data.py
new file mode 100644
index 00000000..aaabafe2
--- /dev/null
+++ b/tutorials/retriever/data.py
@@ -0,0 +1,21 @@
+query_1 = "What are the benefits of renewable energy?"  # gt is [0, 3]
+query_2 = "How do solar panels impact the environment?"  # gt is [1, 2]
+
+documents = [
+    {
+        "title": "The Impact of Renewable Energy on the Economy",
+        "content": "Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.",
+    },
+    {
+        "title": "Understanding Solar Panels",
+        "content": "Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.",
+    },
+    {
+        "title": "Pros and Cons of Solar Energy",
+        "content": "While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.",
+    },
+    {
+        "title": "Renewable Energy and Its Effects",
+        "content": "Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.",
+    },
+]
diff --git a/tutorials/retriever/local_db.py b/tutorials/retriever/local_db.py
new file mode 100644
index 00000000..27e57a2d
--- /dev/null
+++ b/tutorials/retriever/local_db.py
@@ -0,0 +1 @@
+# show case the get_items and get_transformed_data methods
diff --git a/use_cases/.gitignore b/use_cases/.gitignore
index d7674511..f0e791b7 100644
--- a/use_cases/.gitignore
+++ b/use_cases/.gitignore
@@ -1,2 +1,3 @@
 .ipynb_checkpoints/
 ../index.faiss
+/extend
diff --git a/use_cases/classification/prepare_for_train.py b/use_cases/classification/prepare_for_train.py
index abbdd15e..e24f5c7f 100644
--- a/use_cases/classification/prepare_for_train.py
+++ b/use_cases/classification/prepare_for_train.py
@@ -85,7 +85,7 @@ def diagnose_string_output(
 
 
 if __name__ == "__main__":
-    from LightRAG.use_cases.config import (
+    from use_cases.config import (
         gpt_4o_model,
     )
 
diff --git a/use_cases/classification/trec_task_structured_output.py b/use_cases/classification/trec_task_structured_output.py
index 98e3c184..eb5333cd 100644
--- a/use_cases/classification/trec_task_structured_output.py
+++ b/use_cases/classification/trec_task_structured_output.py
@@ -87,12 +87,18 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
             use_cache=True,
         )
 
+    # TODO: can automatically convert everything to parameter if it is not already
+    # inside of the forward function instead of doing it here.
+    # and this conversion will give input type automatically
     def _prepare_input(self, question: str):
         input_data = self.data_class(question=question)
         input_str = self.parser.get_input_str(input_data)
         prompt_kwargs = {
             "input_str": adal.Parameter(
-                data=input_str, requires_opt=False, role_desc="input to the LLM"
+                data=input_str,
+                requires_opt=True,
+                role_desc="input to the LLM",
+                param_type=adal.ParameterType.INPUT,
             )
         }
         return prompt_kwargs
diff --git a/use_cases/generator/basic.ipynb b/use_cases/generator/basic.ipynb
index 8c2385ac..117f3919 100644
--- a/use_cases/generator/basic.ipynb
+++ b/use_cases/generator/basic.ipynb
@@ -1,5 +1,12 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Basic Generator Usage"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -17,6 +24,7 @@
    "source": [
     "In default, the generator uses a default prompt template. It has these varaibles:\n",
     "\n",
+    "```\n",
     "LIGHTRAG_DEFAULT_PROMPT_ARGS = [\n",
     "    \"task_desc_str\",\n",
     "    \"output_format_str\",\n",
@@ -27,17 +35,30 @@
     "    \"steps_str\",\n",
     "    \"input_str\",\n",
     "    \"output_str\",\n",
-    "]"
+    "]\n",
+    "```"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<RootLogger root (INFO)>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# first, let's set up the library log just in case, in default at INFO level\n",
-    "from utils.logger import get_logger\n",
+    "from adalflow.utils.logger import get_logger\n",
+    "\n",
     "get_logger()"
    ]
   },
@@ -50,25 +71,23 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-06-09 22:06:49 - INFO - [prompt_builder.py:82:__init__] - Prompt has variables: ['chat_history_str', 'task_desc_str', 'steps_str', 'examples_str', 'tools_str', 'context_str', 'output_str', 'output_format_str', 'input_str']\n",
-      "2024-06-09 22:06:49 - INFO - [generator.py:194:call] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:06:49 - INFO - [generator.py:195:call] - model_kwargs: {}\n",
-      "2024-06-09 22:06:49 - INFO - [openai_client.py:122:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<Inputs>\\nWhat is the capital of France?\\n</Inputs>'}]}\n",
-      "2024-06-09 22:06:49 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:06:49 - INFO - [generator.py:203:call] - output: GeneratorOutput(data='The capital of France is Paris.', error=None, raw_response='The capital of France is Paris.')\n",
-      "GeneratorOutput(data='The capital of France is Paris.', error=None, raw_response='The capital of France is Paris.')\n"
+      "2024-09-11 06:28:35 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['steps_str', 'context_str', 'output_format_str', 'input_format_str', 'examples_str', 'chat_history_str', 'task_desc_str', 'tools_str', 'input_str']\n",
+      "2024-09-11 06:28:35 - generator - INFO - [generator.py:141:__init__] - Generator Generator initialized.\n",
+      "2024-09-11 06:28:35 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<START_OF_SYSTEM_PROMPT>\\nYou are a helpful assistant.\\n<END_OF_SYSTEM_PROMPT>\\n<START_OF_USER_PROMPT>\\nWhat is the capital of France?\\n<END_OF_USER_PROMPT>'}]}\n",
+      "2024-09-11 06:28:36 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:28:36 - generator - INFO - [generator.py:773:call] - output: GeneratorOutput(id=None, data='The capital of France is Paris.', error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=48, total_tokens=55), raw_response='The capital of France is Paris.', metadata=None)\n",
+      "GeneratorOutput(id=None, data='The capital of France is Paris.', error=None, usage=CompletionUsage(completion_tokens=7, prompt_tokens=48, total_tokens=55), raw_response='The capital of France is Paris.', metadata=None)\n"
      ]
     }
    ],
    "source": [
     "from adalflow.core import Generator\n",
     "from adalflow.components.model_client import OpenAIClient\n",
-    "from adalflow.utils import setup_env # ensure you have .env with OPENAI_API_KEY\n",
+    "from adalflow.utils import setup_env  # ensure you have .env with OPENAI_API_KEY\n",
     "\n",
+    "setup_env(\".env\")\n",
     "query = \"What is the capital of France?\"\n",
-    "model_kwargs = {\n",
-    "    \"model\": \"gpt-3.5-turbo\"\n",
-    "}\n",
+    "model_kwargs = {\"model\": \"gpt-3.5-turbo\"}\n",
     "generator = Generator(model_client=OpenAIClient(), model_kwargs=model_kwargs)\n",
     "prompt_kwargs = {\n",
     "    \"input_str\": query,\n",
@@ -95,12 +114,25 @@
      "output_type": "stream",
      "text": [
       "Prompt:\n",
-      "\n",
-      "<Inputs>\n",
+      "______________________\n",
+      "<START_OF_SYSTEM_PROMPT>\n",
+      "You are a helpful assistant.\n",
+      "<END_OF_SYSTEM_PROMPT>\n",
+      "<START_OF_USER_PROMPT>\n",
       "What is the capital of France?\n",
-      "</Inputs>\n",
+      "<END_OF_USER_PROMPT>\n",
       "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'<START_OF_SYSTEM_PROMPT>\\nYou are a helpful assistant.\\n<END_OF_SYSTEM_PROMPT>\\n<START_OF_USER_PROMPT>\\nWhat is the capital of France?\\n<END_OF_USER_PROMPT>\\n'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -117,61 +149,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-06-09 22:09:43 - INFO - [prompt_builder.py:82:__init__] - Prompt has variables: ['input_str']\n",
-      "2024-06-09 22:09:43 - INFO - [generator.py:194:call] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:09:43 - INFO - [generator.py:195:call] - model_kwargs: {}\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-06-09 22:09:43 - INFO - [openai_client.py:122:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> Your are an assistant with a great sense of humor.</SYS> User: What is the capital of France?. You:'}]}\n",
-      "2024-06-09 22:09:44 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:09:44 - INFO - [generator.py:203:call] - output: GeneratorOutput(data='The capital of France is Paris. It\\'s the city of love, pastries, and baguettes, so pack your beret and get ready to say \"ooh la la!\"', error=None, raw_response='The capital of France is Paris. It\\'s the city of love, pastries, and baguettes, so pack your beret and get ready to say \"ooh la la!\"')\n",
-      "GeneratorOutput(data='The capital of France is Paris. It\\'s the city of love, pastries, and baguettes, so pack your beret and get ready to say \"ooh la la!\"', error=None, raw_response='The capital of France is Paris. It\\'s the city of love, pastries, and baguettes, so pack your beret and get ready to say \"ooh la la!\"')\n"
+      "2024-09-11 06:29:25 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n",
+      "2024-09-11 06:29:25 - generator - INFO - [generator.py:141:__init__] - Generator Generator initialized.\n",
+      "2024-09-11 06:29:25 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': '<SYS> Your are an assistant with a great sense of humor.</SYS> User: What is the capital of France?. You:'}]}\n",
+      "2024-09-11 06:29:26 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:29:26 - generator - INFO - [generator.py:773:call] - output: GeneratorOutput(id=None, data='I\\'m not sure, but I\\'ve heard it\\'s pronounced \"Paris\". ', error=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=34, total_tokens=50), raw_response='I\\'m not sure, but I\\'ve heard it\\'s pronounced \"Paris\". ', metadata=None)\n",
+      "GeneratorOutput(id=None, data='I\\'m not sure, but I\\'ve heard it\\'s pronounced \"Paris\". ', error=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=34, total_tokens=50), raw_response='I\\'m not sure, but I\\'ve heard it\\'s pronounced \"Paris\". ', metadata=None)\n"
      ]
     }
    ],
    "source": [
     "template = \"\"\"<SYS> Your are an assistant with a great sense of humor.</SYS> User: {{input_str}}. You:\"\"\"\n",
     "\n",
-    "generator2 = Generator(model_client=OpenAIClient(), model_kwargs=model_kwargs, template=template)\n",
+    "generator2 = Generator(\n",
+    "    model_client=OpenAIClient(), model_kwargs=model_kwargs, template=template\n",
+    ")\n",
     "response = generator2(prompt_kwargs=prompt_kwargs)\n",
     "print(response)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-06-09 22:12:28 - INFO - [prompt_builder.py:82:__init__] - Prompt has variables: ['input_str']\n",
-      "2024-06-09 22:12:28 - INFO - [generator.py:194:call] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:12:28 - INFO - [generator.py:195:call] - model_kwargs: {}\n",
-      "2024-06-09 22:12:29 - INFO - [_client.py:1026:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:12:29 - INFO - [generator.py:203:call] - output: GeneratorOutput(data='Bonjour! The capital of France is indeed Paris! But did you know that Paris is also the city of love, famous for its Eiffel Tower, Louvre Museum, and, of course, croissants...', error=None, raw_response='Bonjour! The capital of France is indeed Paris! But did you know that Paris is also the city of love, famous for its Eiffel Tower, Louvre Museum, and, of course, croissants...')\n",
-      "GeneratorOutput(data='Bonjour! The capital of France is indeed Paris! But did you know that Paris is also the city of love, famous for its Eiffel Tower, Louvre Museum, and, of course, croissants...', error=None, raw_response='Bonjour! The capital of France is indeed Paris! But did you know that Paris is also the city of love, famous for its Eiffel Tower, Louvre Museum, and, of course, croissants...')\n"
+      "2024-09-11 06:29:54 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n",
+      "2024-09-11 06:29:54 - generator - INFO - [generator.py:141:__init__] - Generator Generator initialized.\n",
+      "2024-09-11 06:29:54 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:29:54 - generator - INFO - [generator.py:773:call] - output: GeneratorOutput(id=None, data='Bonjour! But let me guess, you\\'re not asking me because you just want to chat (although, let\\'s be real, I\\'m fabulous at conversation). No, I\\'m guessing you\\'re asking because you need to polish off your geography skills and you\\'ve got a Jeopardy! audition coming up, oui?\\n\\nSo, the capital of France (drumroll, please)... is PARIS! Voilà! You\\'re welcome. Now, if you\\'ll excuse me, I have to go practice my \"Ooh la la\"s in the mirror.', error=None, usage=CompletionUsage(completion_tokens=114, prompt_tokens=37, total_tokens=151), raw_response='Bonjour! But let me guess, you\\'re not asking me because you just want to chat (although, let\\'s be real, I\\'m fabulous at conversation). No, I\\'m guessing you\\'re asking because you need to polish off your geography skills and you\\'ve got a Jeopardy! audition coming up, oui?\\n\\nSo, the capital of France (drumroll, please)... is PARIS! Voilà! You\\'re welcome. Now, if you\\'ll excuse me, I have to go practice my \"Ooh la la\"s in the mirror.', metadata=None)\n",
+      "GeneratorOutput(id=None, data='Bonjour! But let me guess, you\\'re not asking me because you just want to chat (although, let\\'s be real, I\\'m fabulous at conversation). No, I\\'m guessing you\\'re asking because you need to polish off your geography skills and you\\'ve got a Jeopardy! audition coming up, oui?\\n\\nSo, the capital of France (drumroll, please)... is PARIS! Voilà! You\\'re welcome. Now, if you\\'ll excuse me, I have to go practice my \"Ooh la la\"s in the mirror.', error=None, usage=CompletionUsage(completion_tokens=114, prompt_tokens=37, total_tokens=151), raw_response='Bonjour! But let me guess, you\\'re not asking me because you just want to chat (although, let\\'s be real, I\\'m fabulous at conversation). No, I\\'m guessing you\\'re asking because you need to polish off your geography skills and you\\'ve got a Jeopardy! audition coming up, oui?\\n\\nSo, the capital of France (drumroll, please)... is PARIS! Voilà! You\\'re welcome. Now, if you\\'ll excuse me, I have to go practice my \"Ooh la la\"s in the mirror.', metadata=None)\n"
      ]
     }
    ],
    "source": [
     "# Let us use llama3 from groq\n",
-    "from lightrag.components.model_client import GroqAPIClient\n",
+    "from adalflow.components.model_client import GroqAPIClient\n",
     "\n",
     "groq_model_kwargs = {\"model\": \"llama3-8b-8192\"}\n",
-    "generator3 = Generator(model_client=GroqAPIClient(), model_kwargs=groq_model_kwargs, template=template)\n",
+    "generator3 = Generator(\n",
+    "    model_client=GroqAPIClient(), model_kwargs=groq_model_kwargs, template=template\n",
+    ")\n",
     "\n",
     "response = generator3(prompt_kwargs=prompt_kwargs)\n",
     "print(response)"
@@ -179,78 +207,80 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:217:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
-      "2024-06-09 22:20:08 - INFO - [generator.py:218:acall] - model_kwargs: {}\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data=\"Bonjour! The capital of France is Paris, of course! But did you know that Paris is actually the City of Love? At least, that's what the tourists keep telling me.\", error=None, raw_response=\"Bonjour! The capital of France is Paris, of course! But did you know that Paris is actually the City of Love? At least, that's what the tourists keep telling me.\")\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data='Bien sûr! The capital of France is... (drumroll, please)... Paris!', error=None, raw_response='Bien sûr! The capital of France is... (drumroll, please)... Paris!')\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data='Bonjour!\\n\\nThe capital of France is none other than Paris! (or as the French like to call it, \"The City of Love and Croissants\").\\n\\nYou know what they say: Paris is always a good idea...', error=None, raw_response='Bonjour!\\n\\nThe capital of France is none other than Paris! (or as the French like to call it, \"The City of Love and Croissants\").\\n\\nYou know what they say: Paris is always a good idea...')\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data=\"Bonjour! The capital of France is Paris, of course! But did you know that it's also the City of Love, where the Eiffel Tower stands tall and the croissants are always fresh?\", error=None, raw_response=\"Bonjour! The capital of France is Paris, of course! But did you know that it's also the City of Love, where the Eiffel Tower stands tall and the croissants are always fresh?\")\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data='Bonjour! The capital of France is Paris, naturally!', error=None, raw_response='Bonjour! The capital of France is Paris, naturally!')\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data='Bonjour! The capital of France is... (drumroll please)... Paris! And if you\\'re feeling fancy, you can also say \"Je suis à Paris, comment allez-vous?\" which roughly translates to \"I\\'m in Paris, how are you?\" but actually means \"I\\'m in Paris, drop everything and let\\'s go party!\"', error=None, raw_response='Bonjour! The capital of France is... (drumroll please)... Paris! And if you\\'re feeling fancy, you can also say \"Je suis à Paris, comment allez-vous?\" which roughly translates to \"I\\'m in Paris, how are you?\" but actually means \"I\\'m in Paris, drop everything and let\\'s go party!\"')\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data=\"Bonjour! The capital of France is... (drumroll please)... PARIS! Nope, just kidding, that would be too easy! Seriously though, the capital of France is indeed Paris! But let's keep the suspense going, shall we?\", error=None, raw_response=\"Bonjour! The capital of France is... (drumroll please)... PARIS! Nope, just kidding, that would be too easy! Seriously though, the capital of France is indeed Paris! But let's keep the suspense going, shall we?\")\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data=\"Bonjour! The capital of France is... (drumroll please)... Paris! But seriously, you can't even imagine how much cheese goes into making decisions in that city.\", error=None, raw_response=\"Bonjour! The capital of France is... (drumroll please)... Paris! But seriously, you can't even imagine how much cheese goes into making decisions in that city.\")\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data='Bonjour! The capital of France is Paris, bien sûr! But did you know that Paris is often referred to as the \"City of Love\"? Maybe it\\'s because the Eiffel Tower is the most romantic landmark in the world... or maybe it\\'s because the French have a certain... je ne sais quoi when it comes to romance.', error=None, raw_response='Bonjour! The capital of France is Paris, bien sûr! But did you know that Paris is often referred to as the \"City of Love\"? Maybe it\\'s because the Eiffel Tower is the most romantic landmark in the world... or maybe it\\'s because the French have a certain... je ne sais quoi when it comes to romance.')\n",
-      "2024-06-09 22:20:09 - INFO - [_client.py:1773:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "2024-06-09 22:20:09 - INFO - [generator.py:225:acall] - output: GeneratorOutput(data='Bonjour! The capital of France is Paris, darling! But did you know that Paris is also the city of love, art, and croissants?', error=None, raw_response='Bonjour! The capital of France is Paris, darling! But did you know that Paris is also the city of love, art, and croissants?')\n",
-      "Time taken for 10 async calls: 0.8704450130462646\n",
-      "[GeneratorOutput(data='Bonjour! The capital of France is Paris, darling! But did you know that Paris is also the city of love, art, and croissants?', error=None, raw_response='Bonjour! The capital of France is Paris, darling! But did you know that Paris is also the city of love, art, and croissants?'), GeneratorOutput(data=\"Bonjour! The capital of France is... (drumroll please)... PARIS! Nope, just kidding, that would be too easy! Seriously though, the capital of France is indeed Paris! But let's keep the suspense going, shall we?\", error=None, raw_response=\"Bonjour! The capital of France is... (drumroll please)... PARIS! Nope, just kidding, that would be too easy! Seriously though, the capital of France is indeed Paris! But let's keep the suspense going, shall we?\"), GeneratorOutput(data='Bien sûr! The capital of France is... (drumroll, please)... Paris!', error=None, raw_response='Bien sûr! The capital of France is... (drumroll, please)... Paris!'), GeneratorOutput(data=\"Bonjour! The capital of France is Paris, of course! But did you know that Paris is actually the City of Love? At least, that's what the tourists keep telling me.\", error=None, raw_response=\"Bonjour! The capital of France is Paris, of course! But did you know that Paris is actually the City of Love? At least, that's what the tourists keep telling me.\"), GeneratorOutput(data=\"Bonjour! The capital of France is... (drumroll please)... Paris! But seriously, you can't even imagine how much cheese goes into making decisions in that city.\", error=None, raw_response=\"Bonjour! The capital of France is... (drumroll please)... Paris! But seriously, you can't even imagine how much cheese goes into making decisions in that city.\"), GeneratorOutput(data='Bonjour! The capital of France is Paris, naturally!', error=None, raw_response='Bonjour! The capital of France is Paris, naturally!'), GeneratorOutput(data=\"Bonjour! The capital of France is Paris, of course! But did you know that it's also the City of Love, where the Eiffel Tower stands tall and the croissants are always fresh?\", error=None, raw_response=\"Bonjour! The capital of France is Paris, of course! But did you know that it's also the City of Love, where the Eiffel Tower stands tall and the croissants are always fresh?\"), GeneratorOutput(data='Bonjour! The capital of France is... (drumroll please)... Paris! And if you\\'re feeling fancy, you can also say \"Je suis à Paris, comment allez-vous?\" which roughly translates to \"I\\'m in Paris, how are you?\" but actually means \"I\\'m in Paris, drop everything and let\\'s go party!\"', error=None, raw_response='Bonjour! The capital of France is... (drumroll please)... Paris! And if you\\'re feeling fancy, you can also say \"Je suis à Paris, comment allez-vous?\" which roughly translates to \"I\\'m in Paris, how are you?\" but actually means \"I\\'m in Paris, drop everything and let\\'s go party!\"'), GeneratorOutput(data='Bonjour! The capital of France is Paris, bien sûr! But did you know that Paris is often referred to as the \"City of Love\"? Maybe it\\'s because the Eiffel Tower is the most romantic landmark in the world... or maybe it\\'s because the French have a certain... je ne sais quoi when it comes to romance.', error=None, raw_response='Bonjour! The capital of France is Paris, bien sûr! But did you know that Paris is often referred to as the \"City of Love\"? Maybe it\\'s because the Eiffel Tower is the most romantic landmark in the world... or maybe it\\'s because the French have a certain... je ne sais quoi when it comes to romance.'), GeneratorOutput(data='Bonjour!\\n\\nThe capital of France is none other than Paris! (or as the French like to call it, \"The City of Love and Croissants\").\\n\\nYou know what they say: Paris is always a good idea...', error=None, raw_response='Bonjour!\\n\\nThe capital of France is none other than Paris! (or as the French like to call it, \"The City of Love and Croissants\").\\n\\nYou know what they say: Paris is always a good idea...')]\n"
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:789:acall] - prompt_kwargs: {'input_str': 'What is the capital of France?'}\n",
+      "2024-09-11 06:30:13 - generator - INFO - [generator.py:790:acall] - model_kwargs: {}\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data='Bonjour!', error=None, usage=CompletionUsage(completion_tokens=3, prompt_tokens=37, total_tokens=40), raw_response='Bonjour!', metadata=None)\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data=\"Bonjour! *sips imaginary café au lait* The capital of France, bien sûr! *dramatic flair* It's PARIS, darling! Where the Eiffel Tower is always twinkling, croissants are always flaky, and the fashion is always tres chic!\", error=None, usage=CompletionUsage(completion_tokens=60, prompt_tokens=37, total_tokens=97), raw_response=\"Bonjour! *sips imaginary café au lait* The capital of France, bien sûr! *dramatic flair* It's PARIS, darling! Where the Eiffel Tower is always twinkling, croissants are always flaky, and the fashion is always tres chic!\", metadata=None)\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data=\"Bonjour! You want to know the answer to this one, don't you? Well, let me tell you, it's not Paris... just kidding, it's actually Paris! But seriously, if you want to impress your French friends with some high-stakes trivia, just remember that the capital of France is indeed the City of Love, the City of Lights, and the City of Wining and Dining (and maybe a few croissants).\", error=None, usage=CompletionUsage(completion_tokens=91, prompt_tokens=37, total_tokens=128), raw_response=\"Bonjour! You want to know the answer to this one, don't you? Well, let me tell you, it's not Paris... just kidding, it's actually Paris! But seriously, if you want to impress your French friends with some high-stakes trivia, just remember that the capital of France is indeed the City of Love, the City of Lights, and the City of Wining and Dining (and maybe a few croissants).\", metadata=None)\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data='Bonjour! The capital of France is... (drumroll) Paris! And if you\\'re wondering, the correct pronunciation is \"Ah-reees,\" not \"Purdie-air-iss.\" Don\\'t worry, I won\\'t make fun of you... unless you ask me to.', error=None, usage=CompletionUsage(completion_tokens=59, prompt_tokens=37, total_tokens=96), raw_response='Bonjour! The capital of France is... (drumroll) Paris! And if you\\'re wondering, the correct pronunciation is \"Ah-reees,\" not \"Purdie-air-iss.\" Don\\'t worry, I won\\'t make fun of you... unless you ask me to.', metadata=None)\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data=\"Mon ami, you want to know the capital of France? Well, let me tell you, it's not Rome, it's not Berlin, it's not even Paris (okay, it's actually Paris, don't be smarty pants), it's... (drumroll please)... PARIS! But seriously, if you don't know that by now, we should probably have a chat about your geography skills (just kidding, it's a tough question, I've been there too). But in all seriousness, the answer is indeed Paris! Vive la France!\", error=None, usage=CompletionUsage(completion_tokens=114, prompt_tokens=37, total_tokens=151), raw_response=\"Mon ami, you want to know the capital of France? Well, let me tell you, it's not Rome, it's not Berlin, it's not even Paris (okay, it's actually Paris, don't be smarty pants), it's... (drumroll please)... PARIS! But seriously, if you don't know that by now, we should probably have a chat about your geography skills (just kidding, it's a tough question, I've been there too). But in all seriousness, the answer is indeed Paris! Vive la France!\", metadata=None)\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data='Bonjour! The capital of France is, of course, Paris! But don\\'t worry if you forgot, it\\'s not like you\\'re a \"fowl\" in the sense that you\\'re not aware of it... okay, I\\'ll stop with the bird puns now.', error=None, usage=CompletionUsage(completion_tokens=56, prompt_tokens=37, total_tokens=93), raw_response='Bonjour! The capital of France is, of course, Paris! But don\\'t worry if you forgot, it\\'s not like you\\'re a \"fowl\" in the sense that you\\'re not aware of it... okay, I\\'ll stop with the bird puns now.', metadata=None)\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data=\"Bonjour!\\n\\nOf course, the capital of France is... (drumroll please)... Paris! But let me add, the fashion capital of France is actually Chanel, the cuisine capital is McDonald's, and the procrastination capital is, um, never leaving the Louvre museum.\", error=None, usage=CompletionUsage(completion_tokens=57, prompt_tokens=37, total_tokens=94), raw_response=\"Bonjour!\\n\\nOf course, the capital of France is... (drumroll please)... Paris! But let me add, the fashion capital of France is actually Chanel, the cuisine capital is McDonald's, and the procrastination capital is, um, never leaving the Louvre museum.\", metadata=None)\n",
+      "2024-09-11 06:30:14 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:14 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data='Bonjour!', error=None, usage=CompletionUsage(completion_tokens=3, prompt_tokens=37, total_tokens=40), raw_response='Bonjour!', metadata=None)\n",
+      "2024-09-11 06:30:15 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:15 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data=\"Bonjour! The capital of France is... (drumroll please)... Paris! Oui, oui, it's a city so charming, you'll feel like you're in a romantic comedy... or at least, that's what the movies would have you believe.\", error=None, usage=CompletionUsage(completion_tokens=54, prompt_tokens=37, total_tokens=91), raw_response=\"Bonjour! The capital of France is... (drumroll please)... Paris! Oui, oui, it's a city so charming, you'll feel like you're in a romantic comedy... or at least, that's what the movies would have you believe.\", metadata=None)\n",
+      "2024-09-11 06:30:15 - _client - INFO - [_client.py:1786:_send_single_request] - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
+      "2024-09-11 06:30:15 - generator - INFO - [generator.py:812:acall] - output: GeneratorOutput(id=None, data='Bonjour! The capital of France is, of course, Paris! But did you know that Paris is so romantic, it\\'ll make you want to croon \"Ooh la la\" and drink all the coffee?', error=None, usage=CompletionUsage(completion_tokens=44, prompt_tokens=37, total_tokens=81), raw_response='Bonjour! The capital of France is, of course, Paris! But did you know that Paris is so romantic, it\\'ll make you want to croon \"Ooh la la\" and drink all the coffee?', metadata=None)\n",
+      "Time taken for 10 async calls: 1.94460129737854\n",
+      "[GeneratorOutput(id=None, data=\"Bonjour! You want to know the answer to this one, don't you? Well, let me tell you, it's not Paris... just kidding, it's actually Paris! But seriously, if you want to impress your French friends with some high-stakes trivia, just remember that the capital of France is indeed the City of Love, the City of Lights, and the City of Wining and Dining (and maybe a few croissants).\", error=None, usage=CompletionUsage(completion_tokens=91, prompt_tokens=37, total_tokens=128), raw_response=\"Bonjour! You want to know the answer to this one, don't you? Well, let me tell you, it's not Paris... just kidding, it's actually Paris! But seriously, if you want to impress your French friends with some high-stakes trivia, just remember that the capital of France is indeed the City of Love, the City of Lights, and the City of Wining and Dining (and maybe a few croissants).\", metadata=None), GeneratorOutput(id=None, data='Bonjour!', error=None, usage=CompletionUsage(completion_tokens=3, prompt_tokens=37, total_tokens=40), raw_response='Bonjour!', metadata=None), GeneratorOutput(id=None, data=\"Bonjour!\\n\\nOf course, the capital of France is... (drumroll please)... Paris! But let me add, the fashion capital of France is actually Chanel, the cuisine capital is McDonald's, and the procrastination capital is, um, never leaving the Louvre museum.\", error=None, usage=CompletionUsage(completion_tokens=57, prompt_tokens=37, total_tokens=94), raw_response=\"Bonjour!\\n\\nOf course, the capital of France is... (drumroll please)... Paris! But let me add, the fashion capital of France is actually Chanel, the cuisine capital is McDonald's, and the procrastination capital is, um, never leaving the Louvre museum.\", metadata=None), GeneratorOutput(id=None, data='Bonjour! The capital of France is, of course, Paris! But don\\'t worry if you forgot, it\\'s not like you\\'re a \"fowl\" in the sense that you\\'re not aware of it... okay, I\\'ll stop with the bird puns now.', error=None, usage=CompletionUsage(completion_tokens=56, prompt_tokens=37, total_tokens=93), raw_response='Bonjour! The capital of France is, of course, Paris! But don\\'t worry if you forgot, it\\'s not like you\\'re a \"fowl\" in the sense that you\\'re not aware of it... okay, I\\'ll stop with the bird puns now.', metadata=None), GeneratorOutput(id=None, data=\"Bonjour! The capital of France is... (drumroll please)... Paris! Oui, oui, it's a city so charming, you'll feel like you're in a romantic comedy... or at least, that's what the movies would have you believe.\", error=None, usage=CompletionUsage(completion_tokens=54, prompt_tokens=37, total_tokens=91), raw_response=\"Bonjour! The capital of France is... (drumroll please)... Paris! Oui, oui, it's a city so charming, you'll feel like you're in a romantic comedy... or at least, that's what the movies would have you believe.\", metadata=None), GeneratorOutput(id=None, data='Bonjour! The capital of France is... (drumroll) Paris! And if you\\'re wondering, the correct pronunciation is \"Ah-reees,\" not \"Purdie-air-iss.\" Don\\'t worry, I won\\'t make fun of you... unless you ask me to.', error=None, usage=CompletionUsage(completion_tokens=59, prompt_tokens=37, total_tokens=96), raw_response='Bonjour! The capital of France is... (drumroll) Paris! And if you\\'re wondering, the correct pronunciation is \"Ah-reees,\" not \"Purdie-air-iss.\" Don\\'t worry, I won\\'t make fun of you... unless you ask me to.', metadata=None), GeneratorOutput(id=None, data=\"Mon ami, you want to know the capital of France? Well, let me tell you, it's not Rome, it's not Berlin, it's not even Paris (okay, it's actually Paris, don't be smarty pants), it's... (drumroll please)... PARIS! But seriously, if you don't know that by now, we should probably have a chat about your geography skills (just kidding, it's a tough question, I've been there too). But in all seriousness, the answer is indeed Paris! Vive la France!\", error=None, usage=CompletionUsage(completion_tokens=114, prompt_tokens=37, total_tokens=151), raw_response=\"Mon ami, you want to know the capital of France? Well, let me tell you, it's not Rome, it's not Berlin, it's not even Paris (okay, it's actually Paris, don't be smarty pants), it's... (drumroll please)... PARIS! But seriously, if you don't know that by now, we should probably have a chat about your geography skills (just kidding, it's a tough question, I've been there too). But in all seriousness, the answer is indeed Paris! Vive la France!\", metadata=None), GeneratorOutput(id=None, data='Bonjour!', error=None, usage=CompletionUsage(completion_tokens=3, prompt_tokens=37, total_tokens=40), raw_response='Bonjour!', metadata=None), GeneratorOutput(id=None, data='Bonjour! The capital of France is, of course, Paris! But did you know that Paris is so romantic, it\\'ll make you want to croon \"Ooh la la\" and drink all the coffee?', error=None, usage=CompletionUsage(completion_tokens=44, prompt_tokens=37, total_tokens=81), raw_response='Bonjour! The capital of France is, of course, Paris! But did you know that Paris is so romantic, it\\'ll make you want to croon \"Ooh la la\" and drink all the coffee?', metadata=None), GeneratorOutput(id=None, data=\"Bonjour! *sips imaginary café au lait* The capital of France, bien sûr! *dramatic flair* It's PARIS, darling! Where the Eiffel Tower is always twinkling, croissants are always flaky, and the fashion is always tres chic!\", error=None, usage=CompletionUsage(completion_tokens=60, prompt_tokens=37, total_tokens=97), raw_response=\"Bonjour! *sips imaginary café au lait* The capital of France, bien sûr! *dramatic flair* It's PARIS, darling! Where the Eiffel Tower is always twinkling, croissants are always flaky, and the fashion is always tres chic!\", metadata=None)]\n"
      ]
     }
    ],
    "source": [
     "# Lets do 10 async calls at once, lets use GroqAPIClient\n",
-    "import nest_asyncio # import asyncio, use nest_asyncio.apply() if you are in jupyter notebook\n",
+    "import nest_asyncio  # import asyncio, use nest_asyncio.apply() if you are in jupyter notebook\n",
     "import asyncio\n",
+    "\n",
     "nest_asyncio.apply()\n",
     "\n",
     "import time\n",
     "from typing import List\n",
     "\n",
+    "\n",
     "async def make_async_calls(queries: List[str]):\n",
     "    calls = [generator3.acall(prompt_kwargs={\"input_str\": query}) for query in queries]\n",
     "    responses = await asyncio.gather(*calls)\n",
     "    return responses\n",
     "\n",
+    "\n",
     "queries = [query] * 10\n",
     "start = time.time()\n",
     "responses = asyncio.run(make_async_calls(queries))\n",
     "print(f\"Time taken for 10 async calls: {time.time() - start}\")\n",
-    "print(responses)\n",
-    "\n"
+    "print(responses)"
    ]
   },
   {
@@ -263,9 +293,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "my-project-kernel",
+   "display_name": "openc",
    "language": "python",
-   "name": "my-project-kernel"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -277,7 +307,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.4"
+   "version": "3.10.14"
   }
  },
  "nbformat": 4,
diff --git a/use_cases/rag/build/rag.py b/use_cases/rag/build/rag.py
index e16acb9b..03e2b58f 100644
--- a/use_cases/rag/build/rag.py
+++ b/use_cases/rag/build/rag.py
@@ -77,6 +77,15 @@ def prepare_database_with_index(
     db.save_state(index_path)
 
 
+RAG_PROMPT_TEMPLATE = r"""<START_OF_SYSTEM_MESSAGE>
+{{task_desc}}
+<END_OF_SYSTEM_MESSAGE>
+<START_OF_USER>
+{{input_str}}
+{{context_str}}
+<END_OF_USER>
+"""
+
 rag_prompt_task_desc = r"""
 You are a helpful assistant.