replicate · technillogue · May 17, 2024 · May 6, 2024 · May 17, 2024
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -0,0 +1,14 @@
+# Metrics
+
+Prediction objects have a `metrics` field. This normally includes `predict_time` and `total_time`. Official language models have metrics like `input_token_count`, `output_token_count`, `tokens_per_second`, and `time_to_first_token`. Currently, custom metrics from Cog are ignored when running on Replicate. Official Replicate-published models are the only exception to this. When running outside of Replicate, you can emit custom metrics like this:
+
+
+```python
+import cog
+from cog import BasePredictor, Path
+
+class Predictor(BasePredictor):
+    def predict(self, width: int, height: int) -> Path:
+        """Run a single prediction on the model"""
+        cog.emit_metric(name="pixel_count", value=width * height)
+```
diff --git a/python/cog/__init__.py b/python/cog/__init__.py
@@ -1,6 +1,7 @@
 from pydantic import BaseModel
 
 from .predictor import BasePredictor
+from .server.worker import emit_metric
 from .types import AsyncConcatenateIterator, ConcatenateIterator, File, Input, Path
 
 try:
@@ -18,4 +19,5 @@
     "File",
     "Input",
     "Path",
+    "emit_metric",
 ]
diff --git a/python/cog/server/eventtypes.py b/python/cog/server/eventtypes.py
@@ -38,6 +38,12 @@ class Log:
     source: str = field(validator=validators.in_(["stdout", "stderr"]))
 
 
+@define
+class PredictionMetric:
+    name: str
+    value: "float | int"
+
+
 @define
 class PredictionOutput:
     payload: Any

diff --git a/python/cog/server/runner.py b/python/cog/server/runner.py
@@ -26,6 +26,7 @@
     Heartbeat,
     Log,
     PredictionInput,
+    PredictionMetric,
     PredictionOutput,
     PredictionOutputType,
     PublicEventType,
@@ -438,6 +439,7 @@ def __init__(
         self.logger.info("starting prediction")
         # maybe this should be a deep copy to not share File state with child worker
         self.p = schema.PredictionResponse(**request.dict())
+        self.p.metrics = {}
         self.p.status = schema.Status.PROCESSING
         self.p.output = None
         self.p.logs = ""
@@ -489,9 +491,9 @@ async def succeeded(self) -> None:
         # that...
         assert self.p.completed_at is not None
         assert self.p.started_at is not None
-        self.p.metrics = {
-            "predict_time": (self.p.completed_at - self.p.started_at).total_seconds()
-        }
+        self.p.metrics["predict_time"] = (
+            self.p.completed_at - self.p.started_at
+        ).total_seconds()
         await self._send_webhook(schema.WebhookEvent.COMPLETED)
 
     async def failed(self, error: str) -> None:
@@ -552,6 +554,9 @@ def event_to_handle_future(self, event: PublicEventType) -> Awaitable[None]:
             if self._output_type.multi:
                 return self.set_output([])
             return self.noop()
+        if isinstance(event, PredictionMetric):
+            self.p.metrics[event.name] = event.value
+            return self.noop()
         if isinstance(event, PredictionOutput):
             if self._output_type is None:
                 return self.failed(error="Predictor returned unexpected output")

diff --git a/python/cog/server/worker.py b/python/cog/server/worker.py
@@ -27,6 +27,7 @@
     Heartbeat,
     Log,
     PredictionInput,
+    PredictionMetric,
     PredictionOutput,
     PredictionOutputType,
     PublicEventType,
@@ -85,6 +86,17 @@ async def read(
             raise self.fatal
 
 
+# janky mutable container for a single eventual ChildWorker
+worker_reference: "dict[None, _ChildWorker]" = {}
+
+def emit_metric(metric_name: str, metric_value: "float | int") -> None:
+    worker = worker_reference.get(None, None)
+    if worker is None:
+        raise Exception("Attempted to emit metric but worker is not running")
+    worker._emit_metric(metric_name, metric_value)
+
+
+
 class _ChildWorker(_spawn.Process):  # type: ignore
     def __init__(
         self,
@@ -109,6 +121,7 @@ def run(self) -> None:
         # We use SIGUSR1 to signal an interrupt for cancelation.
         signal.signal(signal.SIGUSR1, self._signal_handler)
 
+        worker_reference[None] = self
         self.prediction_id_context: ContextVar[str] = ContextVar("prediction_context")
 
         # <could be moved into StreamRedirector>
@@ -239,6 +252,12 @@ def _handle_predict_error(self, id: str) -> Iterator[None]:
         self._stream_redirector.drain()
         self._events.send((id, done))
 
+    def _emit_metric(self, name: str, value: "int | float") -> None:
+        prediction_id = self.prediction_id_context.get(None)
+        if prediction_id is None:
+            raise Exception("Tried to emit a metric outside a prediction context")
+        self._events.send((prediction_id, PredictionMetric(name, value)))
+
     def _mk_send(self, id: str) -> Callable[[PublicEventType], None]:
         def send(event: PublicEventType) -> None:
             self._events.send((id, event))

diff --git a/python/tests/server/test_runner.py b/python/tests/server/test_runner.py
@@ -4,8 +4,10 @@
 import time
 from datetime import datetime
 from unittest import mock
+
 import pytest
 import pytest_asyncio
+
 from cog.schema import PredictionRequest, PredictionResponse, Status, WebhookEvent
 from cog.server.clients import ClientManager
 from cog.server.eventtypes import (
@@ -22,7 +24,6 @@
     UnknownPredictionError,
 )
 
-
 # TODO
 # - setup logs
 # - file inputs being converted