Skip to content

Commit

Permalink
feat/build(caddy): remove ratelimit (#178)
Browse files Browse the repository at this point in the history
  • Loading branch information
winstxnhdw authored Jun 8, 2024
1 parent 11d54d1 commit d57ff1f
Show file tree
Hide file tree
Showing 5 changed files with 2 additions and 30 deletions.
9 changes: 0 additions & 9 deletions Caddyfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
order rate_limit before basicauth
order cache before rewrite
cache
}
Expand All @@ -13,14 +12,6 @@
}
}

rate_limit {
zone dynamic_example {
key {remote_host}
events {$EVENTS_PER_WINDOW}
window 60s
}
}

handle_path /api/* {
reverse_proxy http://localhost:{$SERVER_PORT}
}
Expand Down
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@ ENV OMP_NUM_THREADS 1
ENV CT2_USE_EXPERIMENTAL_PACKED_GEMM 1
ENV CT2_FORCE_CPU_ISA AVX512
ENV WORKER_COUNT 2
ENV EVENTS_PER_WINDOW 15

EXPOSE $APP_PORT
5 changes: 1 addition & 4 deletions Dockerfile.build
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ RUN poetry install --without=dev,cuda

FROM caddy:builder-alpine as caddy-builder

RUN xcaddy build master \
--with github.com/caddyserver/cache-handler \
--with github.com/mholt/caddy-ratelimit
RUN xcaddy build master --with github.com/caddyserver/cache-handler


FROM python:slim
Expand All @@ -26,7 +24,6 @@ ENV PYTHONUNBUFFERED 1
ENV PYTHONDONTWRITEBYTECODE 1
ENV SERVER_PORT 5000
ENV CACHE_TIMEOUT 10s
ENV EVENTS_PER_WINDOW 100000

RUN useradd -m -u 1000 user

Expand Down
5 changes: 1 addition & 4 deletions Dockerfile.cuda-build
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@ RUN poetry install --without=dev

FROM caddy:2.8.4-builder-alpine as caddy-builder

RUN xcaddy build master \
--with github.com/caddyserver/cache-handler \
--with github.com/mholt/caddy-ratelimit
RUN xcaddy build master --with github.com/caddyserver/cache-handler


FROM python:slim
Expand All @@ -26,7 +24,6 @@ ENV PYTHONUNBUFFERED 1
ENV PYTHONDONTWRITEBYTECODE 1
ENV SERVER_PORT 5000
ENV CACHE_TIMEOUT 10s
ENV EVENTS_PER_WINDOW 100000
ENV USE_CUDA True
ENV LD_LIBRARY_PATH /usr/local/lib/python3.12/site-packages/nvidia/cublas/lib

Expand Down
12 changes: 0 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -286,18 +286,6 @@ docker run --rm \
ghcr.io/winstxnhdw/nllb-api:main
```

### Rate Limiting

You can set an IP-based rate limit on the number of requests per minute with the following environment variable.

```bash
docker run --rm \
-e APP_PORT=7860 \
-e EVENTS_PER_WINDOW=15 \
-p 7860:7860 \
ghcr.io/winstxnhdw/nllb-api:main
```

### CUDA Support

You can accelerate your inference with CUDA by building and using `Dockerfile.cuda-build` instead.
Expand Down

0 comments on commit d57ff1f

Please sign in to comment.