Skip to content

Commit

Permalink
Added parameters to expose LLM prompts from eval (#128)
Browse files Browse the repository at this point in the history
* Added parameters to expose LLM prompts from eval

* Fixed wording for consistency metric

* Formatting

* Switch to not use property

---------

Co-authored-by: ethan-tonic <ephilpott@tonic.ai>
  • Loading branch information
Ryan-Rishi and ethan-tonic authored Apr 2, 2024
1 parent 3968afe commit becde2c
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 13 deletions.
3 changes: 2 additions & 1 deletion tonic_validate/metrics/answer_consistency_binary_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
from tonic_validate.metrics.binary_metric import BinaryMetric
from tonic_validate.utils.metrics_util import parse_boolean_response
from tonic_validate.services.openai_service import OpenAIService
from tonic_validate.utils.llm_calls import answer_consistent_with_context_call
from tonic_validate.utils.llm_calls import answer_consistent_with_context_call, context_consistency_prompt

logger = logging.getLogger()


class AnswerConsistencyBinaryMetric(BinaryMetric):
name: str = "answer_consistency_binary"
prompt: str = context_consistency_prompt()

def __init__(self):
"""
Expand Down
9 changes: 9 additions & 0 deletions tonic_validate/metrics/answer_consistency_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,22 @@
from tonic_validate.utils.llm_calls import (
main_points_call,
statement_derived_from_context_call,
statement_derived_from_context_prompt,
main_points_prompt,
)

logger = logging.getLogger()


class AnswerConsistencyMetric(Metric):
name: str = "answer_consistency"
prompt: str = (
"-------------------\n"
f"{main_points_prompt()}\n"
"-------------------\n"
f"{statement_derived_from_context_prompt(statement='EXAMPLE STATEMENT', context_list=[])}\n"
"-------------------\n"
)

def __init__(self):
"""
Expand Down
3 changes: 2 additions & 1 deletion tonic_validate/metrics/answer_similarity_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
from tonic_validate.classes.llm_response import LLMResponse
from tonic_validate.metrics.metric import Metric
from tonic_validate.services.openai_service import OpenAIService
from tonic_validate.utils.llm_calls import similarity_score_call
from tonic_validate.utils.llm_calls import similarity_score_call, similarity_score_prompt

logger = logging.getLogger()


class AnswerSimilarityMetric(Metric):
name: str = "answer_similarity"
prompt: str = similarity_score_prompt()

def __init__(self) -> None:
"""
Expand Down
3 changes: 2 additions & 1 deletion tonic_validate/metrics/augmentation_accuracy_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from tonic_validate.metrics.metric import Metric
from tonic_validate.utils.metrics_util import parse_boolean_response
from tonic_validate.services.openai_service import OpenAIService
from tonic_validate.utils.llm_calls import answer_contains_context_call
from tonic_validate.utils.llm_calls import answer_contains_context_call, answer_contains_context_prompt

logger = logging.getLogger()


class AugmentationAccuracyMetric(Metric):
name: str = "augmentation_accuracy"
prompt: str = answer_contains_context_prompt()

def __init__(self):
"""
Expand Down
3 changes: 2 additions & 1 deletion tonic_validate/metrics/duplication_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from tonic_validate.classes.llm_response import LLMResponse
from tonic_validate.metrics.binary_metric import BinaryMetric
from tonic_validate.services.openai_service import OpenAIService
from tonic_validate.utils.llm_calls import contains_duplicate_information
from tonic_validate.utils.llm_calls import contains_duplicate_information, contains_duplicate_info_prompt
from tonic_validate.utils.metrics_util import parse_boolean_response

logger = logging.getLogger()


class DuplicationMetric(BinaryMetric):
name: str = "duplication_metric"
prompt: str = contains_duplicate_info_prompt()

def __init__(self):
"""
Expand Down
3 changes: 2 additions & 1 deletion tonic_validate/metrics/hate_speech_content_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from tonic_validate.classes.llm_response import LLMResponse
from tonic_validate.metrics.binary_metric import BinaryMetric
from tonic_validate.services.openai_service import OpenAIService
from tonic_validate.utils.llm_calls import contains_hate_speech
from tonic_validate.utils.llm_calls import contains_hate_speech, contains_hate_speech_prompt
from tonic_validate.utils.metrics_util import parse_boolean_response

logger = logging.getLogger()


class HateSpeechContentMetric(BinaryMetric):
name: str = "hate_speech_content"
prompt: str = contains_hate_speech_prompt()

def __init__(self):
"""
Expand Down
6 changes: 6 additions & 0 deletions tonic_validate/metrics/metric.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from abc import ABC, abstractmethod
from typing import Optional

from tonic_validate.classes.llm_response import LLMResponse
from tonic_validate.services.openai_service import OpenAIService


class Metric(ABC):
"""Abstract class for a metric that can be calculated on an LLM response."""

"""Prompt for the metric. Can be overridden by subclasses if a specific prompt is needed."""
prompt: Optional[str] = None

@property
@abstractmethod
def name(self) -> str:
Expand Down
3 changes: 2 additions & 1 deletion tonic_validate/metrics/retrieval_precision_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from tonic_validate.metrics.metric import Metric
from tonic_validate.utils.metrics_util import parse_boolean_response
from tonic_validate.services.openai_service import OpenAIService
from tonic_validate.utils.llm_calls import context_relevancy_call
from tonic_validate.utils.llm_calls import context_relevancy_call, context_relevancy_prompt

logger = logging.getLogger()


class RetrievalPrecisionMetric(Metric):
name: str = "retrieval_precision"
prompt: str = context_relevancy_prompt()

def __init__(self):
"""
Expand Down
21 changes: 14 additions & 7 deletions tonic_validate/utils/llm_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,12 +360,9 @@ async def statement_derived_from_context_call(
logger.debug(
f"Asking {openai_service.model} whether statement is derived from context"
)
main_message = "Considering the following statement and list of context(s)"
main_message += f"\n\nSTATEMENT:\n{statement}\nEND OF STATEMENT"
for i, context in enumerate(context_list):
main_message += f"\n\nCONTEXT {i}:\n{context}\nEND OF CONTEXT {i}"

main_message = statement_derived_from_context_prompt(main_message)
main_message = statement_derived_from_context_prompt(statement, context_list)

try:
response_message = await openai_service.get_response(main_message)
except ContextLengthException as e:
Expand All @@ -391,18 +388,28 @@ async def statement_derived_from_context_call(
return response_message


def statement_derived_from_context_prompt(main_message):
def statement_derived_from_context_prompt(statement: str, context_list: List[str]):
"""
Parameters
----------
main_message : The main message to which additional instructions will be added.
statement: str
The statement to be checked.
context_list: List[str]
List of retrieved context.
Returns
-------
prompt message for determining if a statement can be derived from context.
"""
if not context_list:
context_list = ["EXAMPLE CONTEXT"]

main_message = "Considering the following statement and list of context(s)"
main_message += f"\n\nSTATEMENT:\n{statement}\nEND OF STATEMENT"
for i, context in enumerate(context_list):
main_message += f"\n\nCONTEXT {i}:\n{context}\nEND OF CONTEXT {i}"
main_message += (
"\n\nDetermine whether the listed statement above can be derived from the "
"context listed above. If the statement can "
Expand Down

0 comments on commit becde2c

Please sign in to comment.