# Nginx reverse proxy for OpenAI-compatible inference API.
# Routes /v1/* to the configured upstream (M3 MLX, vLLM, llama.cpp, etc.)
# Set UPSTREAM_URL env var or LEM_INFERENCE_BACKEND in docker-compose.

server {
    listen 8080;
    server_name localhost;

    # Health check endpoint.
    location /health {
        return 200 '{"status": "ok"}';
        add_header Content-Type application/json;
    }

    # Proxy all /v1/* requests to the inference backend.
    location /v1/ {
        proxy_pass ${UPSTREAM_URL}/v1/;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_read_timeout 300s;
        proxy_send_timeout 300s;
        proxy_buffering off;
    }

    # Model listing passthrough.
    location /v1/models {
        proxy_pass ${UPSTREAM_URL}/v1/models;
        proxy_set_header Host $host;
    }
}