Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Prompt Maker run function. #68

Merged
merged 14 commits into from
Jan 29, 2024
4 changes: 2 additions & 2 deletions autorag/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from fastapi import FastAPI
from pydantic import BaseModel

from autorag.schema.module import SUPPORT_MODULES
from autorag.support import get_support_modules
from autorag.utils.util import load_summary_file

logger = logging.getLogger("AutoRAG")
Expand Down Expand Up @@ -131,7 +131,7 @@ def run(self, query: str, result_column: str = "answer"):
module = node['modules'][0]
module_type = module.pop('module_type')
module_params = module
previous_result = SUPPORT_MODULES[module_type](
previous_result = get_support_modules(module_type)(
project_dir=self.project_dir,
previous_result=previous_result,
**module_params
Expand Down
2 changes: 1 addition & 1 deletion autorag/nodes/generator/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def run_generator_node(modules: List[Callable],
selected_result, selected_filename = select_best_average(results, strategies.get('metrics'), filenames)
best_result = pd.concat([previous_result, selected_result], axis=1)

# add summary.csv 'is_best' column
# add 'is_best' column at summary file
summary_df['is_best'] = summary_df['filename'] == selected_filename

# save files
Expand Down
164 changes: 164 additions & 0 deletions autorag/nodes/promptmaker/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import os
import pathlib
from copy import deepcopy
from typing import List, Callable, Dict, Optional

import pandas as pd

from autorag.evaluate import evaluate_generation
from autorag.support import get_support_modules
from autorag.strategy import measure_speed, filter_by_threshold, select_best_average
from autorag.utils import validate_qa_dataset
from autorag.utils.util import make_combinations, explode, make_module_file_name


def run_prompt_maker_node(modules: List[Callable],
module_params: List[Dict],
previous_result: pd.DataFrame,
node_line_dir: str,
strategies: Dict,
) -> pd.DataFrame:
"""
Run prompt maker node.
With this function, you can select the best prompt maker module.
As default, when you can use only one module, the evaluation will be skipped.
If you want to select the best prompt among modules, you can use strategies.
When you use them, you must pass 'generator_modules' and its parameters at strategies.
Because it uses generator modules and generator metrics for evaluation this module.
It is recommended to use one params and modules for evaluation,
but you can use multiple params and modules for evaluation.
When you don't set generator module at strategies, it will use the default generator module.
The default generator module is llama_index_llm with openai gpt-3.5-turbo model.

:param modules: Prompt maker modules to run.
:param module_params: Prompt maker module parameters.
:param previous_result: Previous result dataframe.
Could be query expansion's best result or qa data.
:param node_line_dir: This node line's directory.
:param strategies: Strategies for prompt maker node.
:return: The best result dataframe.
It contains previous result columns and prompt maker's result columns which is 'prompts'.
"""
if not os.path.exists(node_line_dir):
os.makedirs(node_line_dir)
node_dir = os.path.join(node_line_dir, "prompt_maker")
if not os.path.exists(node_dir):
os.makedirs(node_dir)
project_dir = pathlib.PurePath(node_line_dir).parent.parent

# run modules
results, execution_times = zip(*map(lambda task: measure_speed(
task[0], project_dir=project_dir, previous_result=previous_result, **task[1]), zip(modules, module_params)))
average_times = list(map(lambda x: x / len(results[0]), execution_times))

# save results to folder
pseudo_module_params = deepcopy(module_params)
for i, module_param in enumerate(pseudo_module_params):
module_param['prompt'] = str(i)
filepaths = list(map(lambda x: os.path.join(node_dir, make_module_file_name(x[0].__name__, x[1])),
zip(modules, pseudo_module_params)))
list(map(lambda x: x[0].to_parquet(x[1], index=False), zip(results, filepaths))) # execute save to parquet
filenames = list(map(lambda x: os.path.basename(x), filepaths))

# make summary file
summary_df = pd.DataFrame({
'filename': filenames,
'module_name': list(map(lambda module: module.__name__, modules)),
'module_params': module_params,
'execution_time': average_times,
})

# Run evaluation when there are more than one module.
if len(modules) > 1:
# pop general keys from strategies (e.g. metrics, speed_threshold)
general_key = ['metrics', 'speed_threshold']
general_strategy = dict(filter(lambda x: x[0] in general_key, strategies.items()))
extra_strategy = dict(filter(lambda x: x[0] not in general_key, strategies.items()))

# first, filter by threshold if it is enabled.
if general_strategy.get('speed_threshold') is not None:
results, filenames = filter_by_threshold(results, average_times, general_strategy['speed_threshold'],
filenames)

# run metrics before filtering
if general_strategy.get('metrics') is None:
raise ValueError("You must at least one metrics for prompt maker evaluation.")

# get generator modules from strategy
generator_callables, generator_params = make_generator_callable_params(extra_strategy)

# get generation_gt
qa_data = pd.read_parquet(os.path.join(project_dir, "data", "qa.parquet"))
validate_qa_dataset(qa_data)
generation_gt = qa_data['generation_gt'].tolist()
generation_gt = list(map(lambda x: x.tolist(), generation_gt))

# run evaluations
evaluation_results = list(map(lambda result: evaluate_one_prompt_maker_node(
generator_callables, generator_params, result['prompts'].tolist(),
generation_gt, general_strategy['metrics'], project_dir), results))

for metric_name in general_strategy['metrics']:
summary_df[f'prompt_maker_{metric_name}'] = list(map(lambda x: x[metric_name].mean(), evaluation_results))

best_result, best_filename = select_best_average(evaluation_results, general_strategy['metrics'], filenames)
# change metric name columns to prompt_maker_metric_name
best_result = best_result.rename(columns={
metric_name: f'prompt_maker_{metric_name}' for metric_name in strategies['metrics']})
best_result = best_result.drop(columns=['generated_texts'])
else:
best_result, best_filename = results[0], filenames[0]

# add 'is_best' column at summary file
summary_df['is_best'] = summary_df['filename'] == best_filename

best_result = pd.concat([previous_result, best_result], axis=1)

# save files
summary_df.to_parquet(os.path.join(node_dir, "summary.parquet"), index=False)
best_result.to_parquet(os.path.join(node_dir, f"best_{os.path.splitext(best_filename)[0]}.parquet"), index=False)

return best_result


def make_generator_callable_params(strategy_dict: Dict):
node_dict = deepcopy(strategy_dict)
generator_module_list: Optional[List[Dict]] = node_dict.pop('generator_modules', None)
if generator_module_list is None:
generator_module_list = [{
'module_type': 'llama_index_llm',
'llm': 'openai',
'model_name': 'gpt-3.5-turbo',
}]
node_params = node_dict
modules = list(map(lambda module_dict: get_support_modules(module_dict.pop('module_type')),
generator_module_list))
param_combinations = list(map(lambda module_dict: make_combinations({**module_dict, **node_params}),
generator_module_list))
return explode(modules, param_combinations)


def evaluate_one_prompt_maker_node(generator_funcs: List[Callable],
generator_params: List[Dict],
prompts: List[str],
generation_gt: List[List[str]],
metrics: List[str],
project_dir) -> pd.DataFrame:
input_df = pd.DataFrame({'prompts': prompts})
generator_results = list(map(lambda x: x[0](project_dir=project_dir, previous_result=input_df, **x[1]),
zip(generator_funcs, generator_params)))
evaluation_results = list(map(lambda x: evaluate_generator_result(x[0], generation_gt, metrics),
zip(generator_results, generator_funcs)))
best_result, _ = select_best_average(evaluation_results, metrics)
best_result = pd.concat([input_df, best_result], axis=1)
return best_result # it has 'generated_texts' column


def evaluate_generator_result(result_df: pd.DataFrame,
generation_gt: List[List[str]],
metrics: List[str]) -> pd.DataFrame:
@evaluate_generation(generation_gt=generation_gt, metrics=metrics)
def evaluate(df):
return df['generated_texts'].tolist()

return evaluate(result_df)
13 changes: 2 additions & 11 deletions autorag/schema/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,7 @@
from dataclasses import dataclass, field
from typing import Callable, Dict

from autorag.nodes.promptmaker import fstring
from autorag.nodes.generator import llama_index_llm
from autorag.nodes.retrieval import bm25, vectordb

SUPPORT_MODULES = {
'bm25': bm25,
'vectordb': vectordb,
'fstring': fstring,
'llama_index_llm': llama_index_llm,
}
from autorag.support import get_support_modules


@dataclass
Expand All @@ -21,7 +12,7 @@ class Module:
module: Callable = field(init=False)

def __post_init__(self):
self.module = SUPPORT_MODULES.get(self.module_type)
self.module = get_support_modules(self.module_type)
if self.module is None:
raise ValueError(f"Module type {self.module_type} is not supported.")

Expand Down
9 changes: 2 additions & 7 deletions autorag/schema/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,10 @@

import pandas as pd

from autorag.nodes.generator.run import run_generator_node
from autorag.nodes.retrieval.run import run_retrieval_node
from autorag.schema.module import Module
from autorag.support import get_support_nodes
from autorag.utils.util import make_combinations, explode

SUPPORT_NODES = {
'retrieval': run_retrieval_node,
'generator': run_generator_node,
}
logger = logging.getLogger("AutoRAG")


Expand All @@ -27,7 +22,7 @@ class Node:
run_node: Callable = field(init=False)

def __post_init__(self):
self.run_node = SUPPORT_NODES.get(self.node_type)
self.run_node = get_support_nodes(self.node_type)
if self.run_node is None:
raise ValueError(f"Node type {self.node_type} is not supported.")

Expand Down
31 changes: 31 additions & 0 deletions autorag/support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import Callable, Dict
import importlib


def dynamically_find_function(key: str, target_dict: Dict) -> Callable:
if key in target_dict:
module_path, func_name = target_dict[key]
module = importlib.import_module(module_path)
func = getattr(module, func_name)
return func
else:
raise KeyError(f"Key {key} is not supported.")


def get_support_modules(module_name: str) -> Callable:
support_modules = {
'bm25': ('autorag.nodes.retrieval', 'bm25'),
'vectordb': ('autorag.nodes.retrieval', 'vectordb'),
'fstring': ('autorag.nodes.promptmaker', 'fstring'),
'llama_index_llm': ('autorag.nodes.generator', 'llama_index_llm'),
}
return dynamically_find_function(module_name, support_modules)


def get_support_nodes(node_name: str) -> Callable:
support_nodes = {
'retrieval': ('autorag.nodes.retrieval.run', 'run_retrieval_node'),
'generator': ('autorag.nodes.generator.run', 'run_generator_node'),
'prompt_maker': ('autorag.nodes.promptmaker.run', 'run_prompt_maker_node'),
}
return dynamically_find_function(node_name, support_nodes)
Loading
Loading