LEM/deploy/inference-proxy.conf

31 lines
862 B
Text
Raw Normal View History

# Nginx reverse proxy for OpenAI-compatible inference API.
# Routes /v1/* to the configured upstream (M3 MLX, vLLM, llama.cpp, etc.)
# Set UPSTREAM_URL env var or LEM_INFERENCE_BACKEND in docker-compose.
server {
listen 8080;
server_name localhost;
# Health check endpoint.
location /health {
return 200 '{"status": "ok"}';
add_header Content-Type application/json;
}
# Proxy all /v1/* requests to the inference backend.
location /v1/ {
proxy_pass ${UPSTREAM_URL}/v1/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_read_timeout 300s;
proxy_send_timeout 300s;
proxy_buffering off;
}
# Model listing passthrough.
location /v1/models {
proxy_pass ${UPSTREAM_URL}/v1/models;
proxy_set_header Host $host;
}
}