Adding properties to evaluation for UI rendering (#37718)

* Adding properties needed by UI * Updating tests * Fixing linting issues * Fixing formatting issues
Azure · Oct 7, 2024 · 886139f · 886139f
1 parent 94396ed
commit 886139f
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 4 deletions.
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py
@@ -48,6 +48,13 @@ class DefaultOpenEncoding:
     """SDK Default Encoding when writing a file"""
 
 
+class EvaluationRunProperties:
+    """Defines properties used to identify an evaluation run by UI"""
+
+    RUN_TYPE = "runType"
+    EVALUATION_RUN = "_azureml.evaluation_run"
+
+
 DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
 
 CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT = 4

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py
@@ -16,6 +16,7 @@
 from .._constants import (
     CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT,
     EvaluationMetrics,
+    EvaluationRunProperties,
     Prefixes,
     _InternalEvaluationMetrics,
 )
@@ -352,7 +353,7 @@ def _apply_target_to_data(
         flow=target,
         display_name=evaluation_name,
         data=data,
-        properties={"runType": "eval_run", "isEvaluatorRun": "true"},
+        properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"},
         stream=True,
         name=_run_name,
     )

diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py
@@ -12,7 +12,12 @@
 
 import pandas as pd
 
-from azure.ai.evaluation._constants import DEFAULT_EVALUATION_RESULTS_FILE_NAME, DefaultOpenEncoding, Prefixes
+from azure.ai.evaluation._constants import (
+    DEFAULT_EVALUATION_RESULTS_FILE_NAME,
+    DefaultOpenEncoding,
+    Prefixes,
+    EvaluationRunProperties,
+)
 from azure.ai.evaluation._evaluate._eval_run import EvalRun
 from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
 
@@ -112,7 +117,8 @@ def _log_metrics_and_instance_results(
             if run is None:
                 ev_run.write_properties_to_run_history(
                     properties={
-                        "_azureml.evaluation_run": "azure-ai-generative-parent",
+                        EvaluationRunProperties.RUN_TYPE: "eval_run",
+                        EvaluationRunProperties.EVALUATION_RUN: "azure-ai-generative-parent",
                         "_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
                         "isEvaluatorRun": "true",
                     }

diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_evaluate.py
@@ -15,6 +15,7 @@
     F1ScoreEvaluator,
     FluencyEvaluator,
     GroundednessEvaluator,
+    evaluate,
 )
 from azure.ai.evaluation._common.math import list_mean_nan_safe
 
@@ -401,6 +402,7 @@ def test_evaluate_track_in_cloud(
         # module named test_evaluate and it will be a different module in unit test
         # folder. By keeping function in separate file we guarantee, it will be loaded
         # from there.
+        # os.environ["AZURE_TEST_RUN_LIVE"] = "True"
         from .target_fn import target_fn
 
         f1_score_eval = F1ScoreEvaluator()
@@ -415,7 +417,6 @@ def test_evaluate_track_in_cloud(
         )
         row_result_df = pd.DataFrame(result["rows"])
 
-        assert "outputs.answer" in row_result_df.columns
         assert "outputs.answer.length" in row_result_df.columns
         assert list(row_result_df["outputs.answer.length"]) == [28, 76, 22]
         assert "outputs.f1.f1_score" in row_result_df.columns
@@ -429,6 +430,7 @@ def test_evaluate_track_in_cloud(
         assert remote_run is not None
         assert remote_run["runMetadata"]["properties"]["azureml.promptflow.local_to_cloud"] == "true"
         assert remote_run["runMetadata"]["properties"]["runType"] == "eval_run"
+        assert remote_run["runMetadata"]["properties"]["_azureml.evaluation_run"] == "promptflow.BatchRun"
         assert remote_run["runMetadata"]["displayName"] == evaluation_name
 
     @pytest.mark.skipif(in_ci(), reason="This test fails in CI and needs to be investigate. Bug: 3458432")
@@ -472,6 +474,7 @@ def test_evaluate_track_in_cloud_no_target(
         remote_run = _get_run_from_run_history(run_id, azure_ml_client, project_scope)
 
         assert remote_run is not None
+        assert remote_run["runMetadata"]["properties"]["runType"] == "eval_run"
         assert remote_run["runMetadata"]["properties"]["_azureml.evaluation_run"] == "azure-ai-generative-parent"
         assert remote_run["runMetadata"]["displayName"] == evaluation_name