langchain-ai · hinthornw · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -18,6 +18,7 @@
 import sys
 import threading
 import time
+import typing
 import uuid
 import warnings
 import weakref
@@ -407,6 +408,24 @@ def _as_uuid(value: ID_TYPE, var: Optional[str] = None) -> uuid.UUID:
         ) from e
 
 
+@typing.overload
+def _ensure_uuid(value: Optional[Union[str, uuid.UUID]]) -> uuid.UUID: ...
+
+
+@typing.overload
+def _ensure_uuid(
+    value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = True
+) -> Optional[uuid.UUID]: ...
+
+
+def _ensure_uuid(value: Optional[Union[str, uuid.UUID]], *, accept_null: bool = False):
+    if value is None:
+        if accept_null:
+            return None
+        return uuid.uuid4()
+    return _as_uuid(value)
+
+
 @functools.lru_cache(maxsize=1)
 def _parse_url(url):
     parsed_url = urllib_parse.urlparse(url)
@@ -3138,12 +3157,11 @@ def create_example(
         if created_at:
             data["created_at"] = created_at.isoformat()
         data["id"] = example_id or str(uuid.uuid4())
-        example = ls_schemas.ExampleCreate(**data)
         response = self.request_with_retries(
             "POST",
             "/examples",
             headers={**self._headers, "Content-Type": "application/json"},
-            data=example.json(),
+            data=_dumps_json({k: v for k, v in data.items() if v is not None}),
         )
         ls_utils.raise_for_status_with_text(response)
         result = response.json()
@@ -3275,7 +3293,7 @@ def update_example(
         Dict[str, Any]
             The updated example.
         """
-        example = ls_schemas.ExampleUpdate(
+        example = dict(
             inputs=inputs,
             outputs=outputs,
             dataset_id=dataset_id,
@@ -3286,7 +3304,7 @@ def update_example(
             "PATCH",
             f"/examples/{_as_uuid(example_id, 'example_id')}",
             headers={**self._headers, "Content-Type": "application/json"},
-            data=example.json(exclude_none=True),
+            data=_dumps_json({k: v for k, v in example.items() if v is not None}),
         )
         ls_utils.raise_for_status_with_text(response)
         return response.json()
@@ -3386,7 +3404,7 @@ def _select_eval_results(
                 results_ = cast(List[ls_evaluator.EvaluationResult], results["results"])
             else:
                 results_ = [
-                    ls_evaluator.EvaluationResult(**{"key": fn_name, **results})
+                    ls_evaluator.EvaluationResult(**{"key": fn_name, **results})  # type: ignore[arg-type]
                 ]
         else:
             raise TypeError(
@@ -3631,8 +3649,8 @@ def create_feedback(
                 )
             feedback_source.metadata["__run"] = _run_meta
         feedback = ls_schemas.FeedbackCreate(
-            id=feedback_id or uuid.uuid4(),
-            run_id=run_id,
+            id=_ensure_uuid(feedback_id),
+            run_id=_ensure_uuid(run_id),
             key=key,
             score=score,
             value=value,
@@ -3642,9 +3660,11 @@ def create_feedback(
             created_at=datetime.datetime.now(datetime.timezone.utc),
             modified_at=datetime.datetime.now(datetime.timezone.utc),
             feedback_config=feedback_config,
-            session_id=project_id,
-            comparative_experiment_id=comparative_experiment_id,
-            feedback_group_id=feedback_group_id,
+            session_id=_ensure_uuid(project_id, accept_null=True),
+            comparative_experiment_id=_ensure_uuid(
+                comparative_experiment_id, accept_null=True
+            ),
+            feedback_group_id=_ensure_uuid(feedback_group_id, accept_null=True),
         )
         feedback_block = _dumps_json(feedback.dict(exclude_none=True))
         self.request_with_retries(
@@ -4038,8 +4058,6 @@ def list_annotation_queues(
         ):
             yield ls_schemas.AnnotationQueue(
                 **queue,
-                _host_url=self._host_url,
-                _tenant_id=self._get_optional_tenant_id(),
             )
             if limit is not None and i + 1 >= limit:
                 break
@@ -4078,8 +4096,6 @@ def create_annotation_queue(
         ls_utils.raise_for_status_with_text(response)
         return ls_schemas.AnnotationQueue(
             **response.json(),
-            _host_url=self._host_url,
-            _tenant_id=self._get_optional_tenant_id(),
         )
 
     def read_annotation_queue(self, queue_id: ID_TYPE) -> ls_schemas.AnnotationQueue:

diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py
@@ -22,7 +22,7 @@
 try:
     from pydantic.v1 import BaseModel, Field, ValidationError  # type: ignore[import]
 except ImportError:
-    from pydantic import BaseModel, Field, ValidationError
+    from pydantic import BaseModel, Field, ValidationError  # type: ignore[assignment]
 
 from functools import wraps
 

diff --git a/python/langsmith/evaluation/string_evaluator.py b/python/langsmith/evaluation/string_evaluator.py
@@ -35,4 +35,4 @@ def evaluate_run(
         run_input = run.inputs[self.input_key]
         run_output = run.outputs[self.prediction_key]
         grading_results = self.grading_function(run_input, run_output, answer)
-        return EvaluationResult(key=self.evaluation_name, **grading_results)
+        return EvaluationResult(**{"key": self.evaluation_name, **grading_results})
diff --git a/python/langsmith/run_helpers.py b/python/langsmith/run_helpers.py
@@ -728,14 +728,16 @@ def trace(
     else:
         new_run = run_trees.RunTree(
             name=name,
-            id=run_id or uuid.uuid4(),
-            reference_example_id=reference_example_id,
+            id=ls_client._ensure_uuid(run_id),
+            reference_example_id=ls_client._ensure_uuid(
+                reference_example_id, accept_null=True
+            ),
             run_type=run_type,
             extra=extra_outer,
-            project_name=project_name_,
+            project_name=project_name_,  # type: ignore[arg-type]
             inputs=inputs or {},
             tags=tags_,
-            client=client,
+            client=client,  # type: ignore[arg-type]
         )
     new_run.post()
     _PARENT_RUN_TREE.set(new_run)
@@ -1094,7 +1096,7 @@ def _setup_run(
         )
     else:
         new_run = run_trees.RunTree(
-            id=id_,
+            id=ls_client._ensure_uuid(id_),
             name=name_,
             serialized={
                 "name": name,
@@ -1103,11 +1105,13 @@ def _setup_run(
             },
             inputs=inputs,
             run_type=run_type,
-            reference_example_id=reference_example_id,
-            project_name=selected_project,
+            reference_example_id=ls_client._ensure_uuid(
+                reference_example_id, accept_null=True
+            ),
+            project_name=selected_project,  # type: ignore[arg-type]
             extra=extra_inner,
             tags=tags_,
-            client=client_,
+            client=client_,  # type: ignore
         )
     try:
         new_run.post()

diff --git a/python/langsmith/run_trees.py b/python/langsmith/run_trees.py
@@ -11,14 +11,18 @@
 try:
     from pydantic.v1 import Field, root_validator, validator  # type: ignore[import]
 except ImportError:
-    from pydantic import Field, root_validator, validator
+    from pydantic import (  # type: ignore[assignment, no-redef]
+        Field,
+        root_validator,
+        validator,
+    )
 
 import threading
 import urllib.parse
 
 from langsmith import schemas as ls_schemas
 from langsmith import utils
-from langsmith.client import ID_TYPE, RUN_TYPE_T, Client, _dumps_json
+from langsmith.client import ID_TYPE, RUN_TYPE_T, Client, _dumps_json, _ensure_uuid
 
 logger = logging.getLogger(__name__)
 
@@ -218,7 +222,7 @@ def create_child(
         serialized_ = serialized or {"name": name}
         run = RunTree(
             name=name,
-            id=run_id or uuid4(),
+            id=_ensure_uuid(run_id),
             serialized=serialized_,
             inputs=inputs or {},
             outputs=outputs or {},
@@ -229,7 +233,7 @@ def create_child(
             end_time=end_time,
             extra=extra or {},
             parent_run=self,
-            session_name=self.session_name,
+            project_name=self.session_name,
             client=self.client,
             tags=tags,
         )

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -573,6 +573,18 @@ class TracerSessionResult(TracerSession):
     """Feedback stats for the project."""
     run_facets: Optional[List[Dict[str, Any]]]
     """Facets for the runs in the project."""
+    total_cost: Optional[Decimal]
+    """The total estimated LLM cost associated with the completion tokens."""
+    prompt_cost: Optional[Decimal]
+    """The estimated cost associated with the prompt (input) tokens."""
+    completion_cost: Optional[Decimal]
+    """The estimated cost associated with the completion tokens."""
+    first_token_p50: Optional[timedelta]
+    """The median (50th percentile) time to process the first token."""
+    first_token_p99: Optional[timedelta]
+    """The 99th percentile time to process the first token."""
+    error_rate: Optional[float]
+    """The error rate for the project."""
 
 
 @runtime_checkable