# Nginx reverse proxy for OpenAI-compatible inference API. # Routes /v1/* to the configured upstream (M3 MLX, vLLM, llama.cpp, etc.) # Set UPSTREAM_URL env var or LEM_INFERENCE_BACKEND in docker-compose. server { listen 8080; server_name localhost; # Health check endpoint. location /health { return 200 '{"status": "ok"}'; add_header Content-Type application/json; } # Proxy all /v1/* requests to the inference backend. location /v1/ { proxy_pass ${UPSTREAM_URL}/v1/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_read_timeout 300s; proxy_send_timeout 300s; proxy_buffering off; } # Model listing passthrough. location /v1/models { proxy_pass ${UPSTREAM_URL}/v1/models; proxy_set_header Host $host; } }