Skip to content

Commit

Permalink
Poison model healthcheck on shutdown
Browse files Browse the repository at this point in the history
We have a problem in production where a broken model is not correctly
shutting down when requested, which means that director comes back up,
sees a healthy model (status READY/BUSY) and starts sending it new
predictions, even though it's supposed to be shutting down.

For now, try and improve the situation by poisoning the model
healthcheck on shutdown. This doesn't solve the underlying problem but
it should stop us losing more predictions to a known-broken pod.
  • Loading branch information
nickstenning committed Jul 3, 2024
1 parent 81187f4 commit a4b86cd
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion python/cog/server/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class Health(Enum):
READY = auto()
BUSY = auto()
SETUP_FAILED = auto()
SHUTTING_DOWN = auto()


class MyState:
Expand Down Expand Up @@ -237,7 +238,9 @@ async def root() -> Any:
@app.get("/health-check")
async def healthcheck() -> Any:
await _check_setup_task()
if app.state.health == Health.READY:
if shutdown_event is not None and shutdown_event.is_set():
health = Health.SHUTTING_DOWN
elif app.state.health == Health.READY:
health = Health.BUSY if runner.is_busy() else Health.READY
else:
health = app.state.health
Expand Down Expand Up @@ -272,6 +275,8 @@ async def predict(
"""
Run a single prediction on the model
"""
if shutdown_event is not None and shutdown_event.is_set():
return JSONResponse({"detail": "Model shutting down"}, status_code=409)
if runner.is_busy():
return JSONResponse(
{"detail": "Already running a prediction"}, status_code=409
Expand All @@ -296,6 +301,8 @@ async def predict_idempotent(
"""
Run a single prediction on the model (idempotent creation).
"""
if shutdown_event is not None and shutdown_event.is_set():
return JSONResponse({"detail": "Model shutting down"}, status_code=409)
if request.id is not None and request.id != prediction_id:
err = ValueError("prediction ID must match the ID supplied in the URL")
raise RequestValidationError([ErrorWrapper(err, ("body", "id"))])
Expand Down

0 comments on commit a4b86cd

Please sign in to comment.