diff --git a/autorag/nodes/passageaugmenter/run.py b/autorag/nodes/passageaugmenter/run.py index e971c321a..30e55dac8 100644 --- a/autorag/nodes/passageaugmenter/run.py +++ b/autorag/nodes/passageaugmenter/run.py @@ -8,6 +8,7 @@ from autorag.nodes.retrieval.run import evaluate_retrieval_node from autorag.schema.metricinput import MetricInput from autorag.strategy import measure_speed, filter_by_threshold, select_best +from autorag.utils.util import apply_recursive, to_list logger = logging.getLogger("AutoRAG") @@ -26,13 +27,7 @@ def run_passage_augmenter_node( os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow" ) retrieval_gt = qa_df["retrieval_gt"].tolist() - retrieval_gt = [ - [ - [str(uuid) for uuid in sub_array] if sub_array.size > 0 else [] - for sub_array in inner_array - ] - for inner_array in retrieval_gt - ] + retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt)) results, execution_times = zip( *map( diff --git a/autorag/nodes/passagefilter/run.py b/autorag/nodes/passagefilter/run.py index 922c7bbdd..b55799625 100644 --- a/autorag/nodes/passagefilter/run.py +++ b/autorag/nodes/passagefilter/run.py @@ -7,6 +7,7 @@ from autorag.nodes.retrieval.run import evaluate_retrieval_node from autorag.schema.metricinput import MetricInput from autorag.strategy import measure_speed, filter_by_threshold, select_best +from autorag.utils.util import to_list, apply_recursive def run_passage_filter_node( @@ -37,13 +38,8 @@ def run_passage_filter_node( os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow" ) retrieval_gt = qa_df["retrieval_gt"].tolist() - retrieval_gt = [ - [ - [str(uuid) for uuid in sub_array] if sub_array.size > 0 else [] - for sub_array in inner_array - ] - for inner_array in retrieval_gt - ] + retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt)) + # make rows to metric_inputs metric_inputs = [ MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt) diff --git a/autorag/nodes/passagereranker/run.py b/autorag/nodes/passagereranker/run.py index 3bbafa614..385776a79 100644 --- a/autorag/nodes/passagereranker/run.py +++ b/autorag/nodes/passagereranker/run.py @@ -8,6 +8,7 @@ from autorag.nodes.retrieval.run import evaluate_retrieval_node from autorag.schema.metricinput import MetricInput from autorag.strategy import measure_speed, filter_by_threshold, select_best +from autorag.utils.util import apply_recursive, to_list logger = logging.getLogger("AutoRAG") @@ -40,13 +41,8 @@ def run_passage_reranker_node( os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow" ) retrieval_gt = qa_df["retrieval_gt"].tolist() - retrieval_gt = [ - [ - [str(uuid) for uuid in sub_array] if sub_array.size > 0 else [] - for sub_array in inner_array - ] - for inner_array in retrieval_gt - ] + retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt)) + # make rows to metric_inputs metric_inputs = [ MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt) diff --git a/autorag/nodes/retrieval/run.py b/autorag/nodes/retrieval/run.py index 8ab85b0b3..152822f62 100644 --- a/autorag/nodes/retrieval/run.py +++ b/autorag/nodes/retrieval/run.py @@ -12,7 +12,7 @@ from autorag.schema.metricinput import MetricInput from autorag.strategy import measure_speed, filter_by_threshold, select_best from autorag.support import get_support_modules -from autorag.utils.util import get_best_row, to_list +from autorag.utils.util import get_best_row, to_list, apply_recursive logger = logging.getLogger("AutoRAG") @@ -47,13 +47,7 @@ def run_retrieval_node( os.path.join(project_dir, "data", "qa.parquet"), engine="pyarrow" ) retrieval_gt = qa_df["retrieval_gt"].tolist() - retrieval_gt = [ - [ - [str(uuid) for uuid in sub_array] if sub_array.size > 0 else [] - for sub_array in inner_array - ] - for inner_array in retrieval_gt - ] + retrieval_gt = apply_recursive(lambda x: str(x), to_list(retrieval_gt)) # make rows to metric_inputs metric_inputs = [ MetricInput(retrieval_gt=ret_gt, query=query, generation_gt=gen_gt) diff --git a/autorag/utils/util.py b/autorag/utils/util.py index 8d92cdd42..0c2fed675 100644 --- a/autorag/utils/util.py +++ b/autorag/utils/util.py @@ -620,3 +620,23 @@ def pop_params(func: Callable, kwargs: Dict) -> Dict: if key in target_params: init_params[key] = kwargs.pop(key) return init_params + + +def apply_recursive(func, data): + """ + Recursively apply a function to all elements in a list, tuple, set, np.ndarray, or pd.Series and return as List. + + :param func: Function to apply to each element. + :param data: List or nested list. + :return: List with the function applied to each element. + """ + if ( + isinstance(data, list) + or isinstance(data, tuple) + or isinstance(data, set) + or isinstance(data, np.ndarray) + or isinstance(data, pd.Series) + ): + return [apply_recursive(func, item) for item in data] + else: + return func(data) diff --git a/docs/source/api_spec/autorag.data.qa.evolve.rst b/docs/source/api_spec/autorag.data.qa.evolve.rst new file mode 100644 index 000000000..c8ce45852 --- /dev/null +++ b/docs/source/api_spec/autorag.data.qa.evolve.rst @@ -0,0 +1,37 @@ +autorag.data.qa.evolve package +============================== + +Submodules +---------- + +autorag.data.qa.evolve.llama\_index\_query\_evolve module +--------------------------------------------------------- + +.. automodule:: autorag.data.qa.evolve.llama_index_query_evolve + :members: + :undoc-members: + :show-inheritance: + +autorag.data.qa.evolve.openai\_query\_evolve module +--------------------------------------------------- + +.. automodule:: autorag.data.qa.evolve.openai_query_evolve + :members: + :undoc-members: + :show-inheritance: + +autorag.data.qa.evolve.prompt module +------------------------------------ + +.. automodule:: autorag.data.qa.evolve.prompt + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: autorag.data.qa.evolve + :members: + :undoc-members: + :show-inheritance: diff --git a/tests/autorag/nodes/retrieval/test_run_retrieval_node.py b/tests/autorag/nodes/retrieval/test_run_retrieval_node.py index 7c7816bf2..79c372ad1 100644 --- a/tests/autorag/nodes/retrieval/test_run_retrieval_node.py +++ b/tests/autorag/nodes/retrieval/test_run_retrieval_node.py @@ -26,7 +26,7 @@ def node_line_dir(): with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as test_project_dir: sample_project_dir = os.path.join(resources_dir, "sample_project") - # copy & paste all folders and files in sample_project folder + # copy & paste all folders and files in the sample_project folder shutil.copytree(sample_project_dir, test_project_dir, dirs_exist_ok=True) chroma_path = os.path.join(test_project_dir, "resources", "chroma") @@ -39,9 +39,9 @@ def node_line_dir(): corpus_df = pd.read_parquet(corpus_path) vectordb_ingest(collection, corpus_df, MockEmbedding(1536)) - test_trail_dir = os.path.join(test_project_dir, "test_trial") - os.makedirs(test_trail_dir) - node_line_dir = os.path.join(test_trail_dir, "test_node_line") + test_trial_dir = os.path.join(test_project_dir, "test_trial") + os.makedirs(test_trial_dir) + node_line_dir = os.path.join(test_trial_dir, "test_node_line") os.makedirs(node_line_dir) yield node_line_dir diff --git a/tests/autorag/utils/test_util.py b/tests/autorag/utils/test_util.py index 83d35806c..8991f7109 100644 --- a/tests/autorag/utils/test_util.py +++ b/tests/autorag/utils/test_util.py @@ -37,6 +37,7 @@ get_event_loop, find_key_values, pop_params, + apply_recursive, ) from tests.mock import MockLLM @@ -609,3 +610,29 @@ def func_mixed(param1, param2, *args, **kwargs): result = pop_params(func_mixed, kwargs) assert result == expected assert kwargs == {"extra_param": "extra_value"} + + +def test_apply_recursive(): + data = [1, 2, 3, 4] + result = apply_recursive(lambda x: x * 2, data) + assert result == [2, 4, 6, 8] + + data = [[1, 2], [3, 4]] + result = apply_recursive(lambda x: x * 2, data) + assert result == [[2, 4], [6, 8]] + + data = [[1, [2, 3]], [4, [5, 6]]] + result = apply_recursive(lambda x: x * 2, data) + assert result == [[2, [4, 6]], [8, [10, 12]]] + + data = [] + result = apply_recursive(lambda x: x * 2, data) + assert result == [] + + data = 5 + result = apply_recursive(lambda x: x * 2, data) + assert result == 10 + + data = [(4, 5), (6, 7), [5, [6, 7]], np.array([4, 5]), pd.Series([4, 5])] + result = apply_recursive(lambda x: x * 2, data) + assert result == [[8, 10], [12, 14], [10, [12, 14]], [8, 10], [8, 10]]