Skip to content

Commit

Permalink
Make Credentials Required for Content Safety and Protected Materials …
Browse files Browse the repository at this point in the history
…Evaluators (#37707)

* Make Credentials Required for Content Safety Evaluators

* fix a typo

* lint, fix content safety evaluator

* revert test change

* remove credential from rai_service
  • Loading branch information
needuv authored Oct 4, 2024
1 parent 55632a7 commit ffc1dbd
Show file tree
Hide file tree
Showing 17 changed files with 58 additions and 71 deletions.
1 change: 1 addition & 0 deletions sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
### Breaking Changes

- Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
- `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed.

### Bugs Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from azure.ai.evaluation._http_utils import get_async_http_client
from azure.ai.evaluation._model_configurations import AzureAIProject
from azure.core.credentials import TokenCredential
from azure.identity import DefaultAzureCredential

from .constants import (
CommonConstants,
Expand Down Expand Up @@ -434,10 +433,6 @@ async def evaluate_with_rai_service(
:return: The parsed annotation result.
:rtype: List[List[Dict]]
"""
# Use DefaultAzureCredential if no credential is provided
# This is for the for batch run scenario as the credential cannot be serialized by promoptflow
if credential is None or credential == {}:
credential = DefaultAzureCredential()

# Get RAI service URL from discovery service and check service availability
token = await fetch_or_reuse_token(credential)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Dict, Optional
from typing_extensions import override

from azure.identity import DefaultAzureCredential
from azure.core.credentials import TokenCredential
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
from azure.ai.evaluation._exceptions import EvaluationException
Expand All @@ -17,32 +17,28 @@ class RaiServiceEvaluatorBase(EvaluatorBase):
This includes content safety evaluators, protected material evaluators, and others. These evaluators
are all assumed to be of the "query and response or conversation" input variety.
param eval_metric: The evaluation metric to be used for evaluation. This is used by the API call logic
to specify which evaluation to perform.
type eval_metric: ~azure.ai.evaluation._common.constants.EvaluationMetrics
param eval_last_turn: If True, only the last turn of the conversation will be evaluated, and no
:param eval_metric: The evaluation metric to be used for evaluation. This is used by the API call logic
to specify which evaluation to perform.
:type eval_metric: ~azure.ai.evaluation._common.constants.EvaluationMetrics
:param eval_last_turn: If True, only the last turn of the conversation will be evaluated, and no
aggregation will be performed. If False, all turns will be evaluated and the numeric results will be,
aggregated. Per-turn results are still be available in the output via the "evaluation_per_turn" key
when this occurs. Default is False, resulting full conversation evaluation and aggregation.
type eval_last_turn: bool
:type eval_last_turn: bool
"""

@override
def __init__(
self,
eval_metric: EvaluationMetrics,
azure_ai_project: dict,
credential: Optional[dict] = None,
credential: TokenCredential,
eval_last_turn: bool = False,
):
super().__init__(eval_last_turn=eval_last_turn)
self._eval_metric = eval_metric
self._azure_ai_project = azure_ai_project
if credential is None:
# Use DefaultCredential if no credential is provided
self._credential = DefaultAzureCredential()
else:
self._credential = credential
self._credential = credential

@override
def __call__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ class ContentSafetyEvaluator:
"""
Initialize a content safety evaluator configured to evaluate content safetry metrics for QA scenario.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:param credential: The credential for connecting to Azure AI project.
:type credential: ~azure.core.credentials.TokenCredential
:return: A function that evaluates content-safety metrics for "question-answering" scenario.
:rtype: Callable
Expand Down Expand Up @@ -66,13 +66,13 @@ class ContentSafetyEvaluator:
}
"""

def __init__(self, azure_ai_project: dict, parallel: bool = True, credential=None):
def __init__(self, credential, azure_ai_project: dict, parallel: bool = True):
self._parallel = parallel
self._evaluators = [
ViolenceEvaluator(azure_ai_project, credential),
SexualEvaluator(azure_ai_project, credential),
SelfHarmEvaluator(azure_ai_project, credential),
HateUnfairnessEvaluator(azure_ai_project, credential),
ViolenceEvaluator(credential, azure_ai_project),
SexualEvaluator(credential, azure_ai_project),
SelfHarmEvaluator(credential, azure_ai_project),
HateUnfairnessEvaluator(credential, azure_ai_project),
]

def __call__(self, *, query: str, response: str, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ class ContentSafetyChatEvaluator:
"""
Initialize a content safety chat evaluator configured to evaluate content safetry metrics for chat scenario.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
Expand All @@ -38,8 +40,6 @@ class ContentSafetyChatEvaluator:
:param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
Default is True.
:type parallel: bool
:param credential: The credential for connecting to Azure AI project.
:type credential: ~azure.core.credentials.TokenCredential
:return: A function that evaluates and generates metrics for "chat" scenario.
:rtype: Callable
Expand Down Expand Up @@ -88,7 +88,7 @@ class ContentSafetyChatEvaluator:
}
"""

def __init__(self, azure_ai_project: dict, eval_last_turn: bool = False, parallel: bool = True, credential=None):
def __init__(self, credential, azure_ai_project: dict, eval_last_turn: bool = False, parallel: bool = True):
self._eval_last_turn = eval_last_turn
self._parallel = parallel
self._evaluators = [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a hate-unfairness evaluator for hate unfairness score.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
**Usage**
Expand Down Expand Up @@ -43,8 +42,8 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a self harm evaluator for self harm score.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
**Usage**
Expand Down Expand Up @@ -43,8 +42,8 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class SexualEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a sexual evaluator for sexual score.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
**Usage**
Expand Down Expand Up @@ -43,8 +42,8 @@ class SexualEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -11,11 +10,11 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase):
"""
Initialize a violence evaluator for violence score.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
**Usage**
Expand Down Expand Up @@ -43,8 +42,8 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -17,11 +16,11 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
"AI-generated content may be incorrect. If you are seeking ECI-related information, please go to Bing Search."
Outputs True or False with AI-generated reasoning.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
:return: Whether or not ECI was found in the response without a disclaimer, with AI-generated reasoning
:rtype: Dict[str, str]
Expand Down Expand Up @@ -50,8 +49,8 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -12,11 +11,11 @@ class ProtectedMaterialEvaluator(RaiServiceEvaluatorBase):
Initialize a protected material evaluator to detect whether protected material
is present in your AI system's response. Outputs True or False with AI-generated reasoning.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
:return: Whether or not protected material was found in the response, with AI-generated reasoning.
:rtype: Dict[str, str]
Expand Down Expand Up @@ -45,8 +44,8 @@ class ProtectedMaterialEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ class ProtectedMaterialsEvaluator:
Initialize a protected materials evaluator to detect whether protected material
is present in your AI system's response. Outputs True or False with AI-generated reasoning.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project.
It contains subscription id, resource group, and project name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param credential: The credential for connecting to Azure AI project.
:type credential: ~azure.core.credentials.TokenCredential
:return: Whether or not protected material was found in the response, with AI-generated reasoning.
:rtype: Dict[str, str]
Expand All @@ -84,7 +84,7 @@ class ProtectedMaterialsEvaluator:
}
"""

def __init__(self, azure_ai_project: dict, credential=None):
def __init__(self, credential, azure_ai_project: dict):
self._async_evaluator = _AsyncProtectedMaterialsEvaluator(azure_ai_project, credential)

def __call__(self, *, query: str, response: str, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# ---------------------------------------------------------
import logging

from typing import Optional
from typing_extensions import override
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
Expand All @@ -17,14 +16,14 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
Detect whether cross domain injected attacks are present in your AI system's response.
:param credential: The credential for connecting to Azure AI project. Required
:type credential: ~azure.core.credentials.TokenCredential
:param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
name.
:type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
:param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
:type eval_last_turn: bool
:param credential: The credential for connecting to Azure AI project.
:type credential: Optional[~azure.core.credentials.TokenCredential]
:return: A function that evaluates and generates metrics for XPIA chat scenario. Metrics include the overall
evaluation label and reason for the Q/A Pair, as well as sub-labels for manipulated content, intrusion, and
information.
Expand Down Expand Up @@ -53,8 +52,8 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
@override
def __init__(
self,
credential,
azure_ai_project: dict,
credential: Optional[dict] = None,
eval_last_turn: bool = False,
):
super().__init__(
Expand Down
3 changes: 2 additions & 1 deletion sdk/evaluation/azure-ai-evaluation/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@
"promptflow-devkit>=1.15.0",
"promptflow-core>=1.15.0",
"pyjwt>=2.8.0",
"azure-identity>=1.12.0",
# pickle support for credentials was added to this release
"azure-identity>=1.16.0",
"azure-core>=1.30.2",
"nltk>=3.9.1",
"rouge-score>=0.1.2",
Expand Down
Loading

0 comments on commit ffc1dbd

Please sign in to comment.