copy of ryan's branch (#133)

TonicAI · Apr 5, 2024 · 2306b57 · 2306b57
1 parent ae2edf3
commit 2306b57
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -441,7 +441,9 @@ Tonic Validate collects minimal telemetry to help us figure out what users want
 
 * What metrics were used for a run
 * Number of questions in a run
+* Time taken for a run to be evaluated
 * Number of questions in a benchmark
+* SDK Version being used
 
 We do **NOT** track things such as the contents of the questions / answers, your scores, or any other sensitive information. For detecting CI/CD, we only check for common environment variables in different CI/CD environments. We do not log the values of these environment variables.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "tonic-validate"
-version = "4.0.4"
+version = "4.0.5"
 description = "RAG evaluation metrics."
 authors = ["Joe Ferrara <joeferrara@tonic.ai>", "Ethan Philpott <ephilpott@tonic.ai>", "Adam Kamor <adam@tonic.ai>"]
 readme = "README.md"

diff --git a/tonic_validate/utils/telemetry.py b/tonic_validate/utils/telemetry.py
@@ -66,7 +66,7 @@ def __is_ci(self):
                 return True
         return False
 
-    def log_run(self, num_of_questions: int, metrics: List[str]):
+    def log_run(self, num_of_questions: int, metrics: List[str], run_time: float):
         """
         Logs a run to the Tonic Validate server
 
@@ -76,16 +76,26 @@ def log_run(self, num_of_questions: int, metrics: List[str]):
             The number of questions asked
         metrics: List[str]
             The metrics that were used to evaluate the run
+        run_time: float
+            The time taken to evaluate the run            
         """
         if self.config.TONIC_VALIDATE_DO_NOT_TRACK:
             return
+        try:
+            from importlib.metadata import version
+            sdk_version = version('tonic-validate')
+        except Exception:
+            sdk_version = "unknown"
+
         user_id = self.get_user()["user_id"]
         self.http_client.http_post(
             "/runs",
             data={
                 "user_id": user_id,
                 "num_of_questions": num_of_questions,
                 "metrics": metrics,
+                "run_time": run_time,
+                "sdk_version": sdk_version,
                 "is_ci": self.__is_ci(),
                 "validate_gh_action": self.config.TONIC_VALIDATE_GITHUB_ACTION,
             },

diff --git a/tonic_validate/validate_scorer.py b/tonic_validate/validate_scorer.py
@@ -165,12 +165,10 @@ async def a_score_responses(
             The Run object containing the scores and other data.
         """
         try:
-            self.telemetry.log_run(
-                len(responses), [metric.name for metric in self.metrics]
-            )
+            start_time = time.time()
         except Exception as _:
-            pass
-
+            start_time = -1
+        
         semaphore = Semaphore(parallelism)
         tasks = [
             self._score_item_rundata(response, semaphore) for response in responses
@@ -196,6 +194,18 @@ async def a_score_responses(
         overall_scores: Dict[str, float] = {
             metric: total / num_scores[metric] for metric, total in total_scores.items()
         }
+        try:
+            end_time = time.time()
+            run_time = end_time - start_time
+        except Exception as _:
+            run_time = -1
+
+        try:
+            self.telemetry.log_run(
+                len(responses), [metric.name for metric in self.metrics], run_time
+            )
+        except Exception as _:
+            pass
 
         return Run(overall_scores=overall_scores, run_data=run_data, llm_evaluator=self.model_evaluator, id=None)