feat(prod): add production infrastructure management
Add `core prod` command with full production infrastructure tooling: - `core prod status` — parallel SSH health checks across all hosts, Galera cluster state, Redis sentinel, Docker, LB health - `core prod setup` — Phase 1 foundation: Hetzner topology discovery, managed LB creation, CloudNS DNS record management - `core prod dns` — CloudNS record CRUD with idempotent EnsureRecord - `core prod lb` — Hetzner Cloud LB status and creation - `core prod ssh <host>` — SSH into hosts defined in infra.yaml New packages: - pkg/infra: config parsing, Hetzner Cloud/Robot API, CloudNS DNS API - infra.yaml: declarative production topology (hosts, LB, DNS, SSL, Galera, Redis, containers, S3, CDN, CI/CD, monitoring, backups) Docker: - Dockerfile.app (PHP 8.3-FPM, multi-stage) - Dockerfile.web (Nginx + security headers) - docker-compose.prod.yml (app, web, horizon, scheduler, mcp, redis, galera) Ansible playbooks (runnable via `core deploy ansible`): - galera-deploy.yml, redis-deploy.yml, galera-backup.yml - inventory.yml with all production hosts CI/CD: - .forgejo/workflows/deploy.yml for Forgejo Actions pipeline Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
00c011bd39
commit
349e8daa0b
25 changed files with 3150 additions and 0 deletions
146
.forgejo/workflows/deploy.yml
Normal file
146
.forgejo/workflows/deploy.yml
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
# Host UK Production Deployment Pipeline
|
||||
# Runs on Forgejo Actions (gitea.snider.dev)
|
||||
# Runner: build.de.host.uk.com
|
||||
#
|
||||
# Workflow:
|
||||
# 1. composer install + test
|
||||
# 2. npm ci + build
|
||||
# 3. docker build + push
|
||||
# 4. Coolify deploy webhook (rolling restart)
|
||||
|
||||
name: Deploy
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
REGISTRY: gitea.snider.dev
|
||||
IMAGE_APP: host-uk/app
|
||||
IMAGE_WEB: host-uk/web
|
||||
IMAGE_CORE: host-uk/core
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: "8.3"
|
||||
extensions: bcmath, gd, intl, mbstring, pdo_mysql, redis, zip
|
||||
coverage: none
|
||||
|
||||
- name: Install Composer dependencies
|
||||
run: composer install --no-interaction --prefer-dist
|
||||
|
||||
- name: Run tests
|
||||
run: composer test
|
||||
|
||||
- name: Check code style
|
||||
run: ./vendor/bin/pint --test
|
||||
|
||||
build-app:
|
||||
name: Build App Image
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22"
|
||||
cache: "npm"
|
||||
|
||||
- name: Login to registry
|
||||
run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
||||
|
||||
- name: Build and push app image
|
||||
run: |
|
||||
SHA=$(git rev-parse --short HEAD)
|
||||
docker build \
|
||||
-f docker/Dockerfile.app \
|
||||
-t ${{ env.REGISTRY }}/${{ env.IMAGE_APP }}:${SHA} \
|
||||
-t ${{ env.REGISTRY }}/${{ env.IMAGE_APP }}:latest \
|
||||
.
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_APP }}:${SHA}
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_APP }}:latest
|
||||
|
||||
build-web:
|
||||
name: Build Web Image
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Login to registry
|
||||
run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
||||
|
||||
- name: Build and push web image
|
||||
run: |
|
||||
SHA=$(git rev-parse --short HEAD)
|
||||
docker build \
|
||||
-f docker/Dockerfile.web \
|
||||
-t ${{ env.REGISTRY }}/${{ env.IMAGE_WEB }}:${SHA} \
|
||||
-t ${{ env.REGISTRY }}/${{ env.IMAGE_WEB }}:latest \
|
||||
.
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_WEB }}:${SHA}
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_WEB }}:latest
|
||||
|
||||
build-core:
|
||||
name: Build Core Image
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: "1.25"
|
||||
|
||||
- name: Build core binary
|
||||
run: |
|
||||
go build -ldflags '-s -w' -o bin/core .
|
||||
|
||||
- name: Login to registry
|
||||
run: echo "${{ secrets.REGISTRY_TOKEN }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.REGISTRY_USER }} --password-stdin
|
||||
|
||||
- name: Build and push core image
|
||||
run: |
|
||||
SHA=$(git rev-parse --short HEAD)
|
||||
cat > Dockerfile.core <<'EOF'
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY bin/core /usr/local/bin/core
|
||||
ENTRYPOINT ["core"]
|
||||
EOF
|
||||
docker build \
|
||||
-f Dockerfile.core \
|
||||
-t ${{ env.REGISTRY }}/${{ env.IMAGE_CORE }}:${SHA} \
|
||||
-t ${{ env.REGISTRY }}/${{ env.IMAGE_CORE }}:latest \
|
||||
.
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_CORE }}:${SHA}
|
||||
docker push ${{ env.REGISTRY }}/${{ env.IMAGE_CORE }}:latest
|
||||
|
||||
deploy:
|
||||
name: Deploy to Production
|
||||
needs: [build-app, build-web, build-core]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Trigger Coolify deploy
|
||||
run: |
|
||||
curl -s -X POST \
|
||||
-H "Authorization: Bearer ${{ secrets.COOLIFY_TOKEN }}" \
|
||||
"${{ secrets.COOLIFY_URL }}/api/v1/deploy" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"uuid": "${{ secrets.COOLIFY_APP_UUID }}", "force": false}'
|
||||
|
||||
- name: Wait for deployment
|
||||
run: |
|
||||
echo "Deployment triggered. Coolify will perform rolling restart."
|
||||
echo "Monitor at: ${{ secrets.COOLIFY_URL }}"
|
||||
107
docker/Dockerfile.app
Normal file
107
docker/Dockerfile.app
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# Host UK — Laravel Application Container
|
||||
# PHP 8.3-FPM with all extensions required by the federated monorepo
|
||||
#
|
||||
# Build: docker build -f docker/Dockerfile.app -t host-uk/app:latest ..
|
||||
# (run from host-uk/ workspace root, not core/)
|
||||
|
||||
FROM php:8.3-fpm-alpine AS base
|
||||
|
||||
# System dependencies
|
||||
RUN apk add --no-cache \
|
||||
git \
|
||||
curl \
|
||||
libpng-dev \
|
||||
libjpeg-turbo-dev \
|
||||
freetype-dev \
|
||||
libwebp-dev \
|
||||
libzip-dev \
|
||||
icu-dev \
|
||||
oniguruma-dev \
|
||||
libxml2-dev \
|
||||
linux-headers \
|
||||
$PHPIZE_DEPS
|
||||
|
||||
# PHP extensions
|
||||
RUN docker-php-ext-configure gd \
|
||||
--with-freetype \
|
||||
--with-jpeg \
|
||||
--with-webp \
|
||||
&& docker-php-ext-install -j$(nproc) \
|
||||
bcmath \
|
||||
exif \
|
||||
gd \
|
||||
intl \
|
||||
mbstring \
|
||||
opcache \
|
||||
pcntl \
|
||||
pdo_mysql \
|
||||
soap \
|
||||
xml \
|
||||
zip
|
||||
|
||||
# Redis extension
|
||||
RUN pecl install redis && docker-php-ext-enable redis
|
||||
|
||||
# Composer
|
||||
COPY --from=composer:2 /usr/bin/composer /usr/bin/composer
|
||||
|
||||
# PHP configuration
|
||||
RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini"
|
||||
COPY docker/php/opcache.ini $PHP_INI_DIR/conf.d/opcache.ini
|
||||
COPY docker/php/php-fpm.conf /usr/local/etc/php-fpm.d/zz-host-uk.conf
|
||||
|
||||
# --- Build stage ---
|
||||
FROM base AS build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies first (cache layer)
|
||||
COPY composer.json composer.lock ./
|
||||
RUN composer install \
|
||||
--no-dev \
|
||||
--no-scripts \
|
||||
--no-autoloader \
|
||||
--prefer-dist \
|
||||
--no-interaction
|
||||
|
||||
# Copy application
|
||||
COPY . .
|
||||
|
||||
# Generate autoloader and run post-install
|
||||
RUN composer dump-autoload --optimize --no-dev \
|
||||
&& php artisan package:discover --ansi
|
||||
|
||||
# Build frontend assets
|
||||
RUN if [ -f package.json ]; then \
|
||||
apk add --no-cache nodejs npm && \
|
||||
npm ci --production=false && \
|
||||
npm run build && \
|
||||
rm -rf node_modules; \
|
||||
fi
|
||||
|
||||
# --- Production stage ---
|
||||
FROM base AS production
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy built application
|
||||
COPY --from=build /app /app
|
||||
|
||||
# Create storage directories
|
||||
RUN mkdir -p \
|
||||
storage/framework/cache/data \
|
||||
storage/framework/sessions \
|
||||
storage/framework/views \
|
||||
storage/logs \
|
||||
bootstrap/cache
|
||||
|
||||
# Permissions
|
||||
RUN chown -R www-data:www-data storage bootstrap/cache
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
|
||||
CMD php-fpm-healthcheck || exit 1
|
||||
|
||||
USER www-data
|
||||
|
||||
EXPOSE 9000
|
||||
19
docker/Dockerfile.web
Normal file
19
docker/Dockerfile.web
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Host UK — Nginx Web Server
|
||||
# Serves static files and proxies PHP to FPM container
|
||||
#
|
||||
# Build: docker build -f docker/Dockerfile.web -t host-uk/web:latest .
|
||||
|
||||
FROM nginx:1.27-alpine
|
||||
|
||||
# Copy nginx configuration
|
||||
COPY docker/nginx/default.conf /etc/nginx/conf.d/default.conf
|
||||
COPY docker/nginx/security-headers.conf /etc/nginx/snippets/security-headers.conf
|
||||
|
||||
# Copy static assets from app build
|
||||
# (In production, these are volume-mounted from the app container)
|
||||
# COPY --from=host-uk/app:latest /app/public /app/public
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD wget -qO- http://localhost/health || exit 1
|
||||
|
||||
EXPOSE 80
|
||||
200
docker/docker-compose.prod.yml
Normal file
200
docker/docker-compose.prod.yml
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
# Host UK Production Docker Compose
|
||||
# Deployed to de.host.uk.com and de2.host.uk.com via Coolify
|
||||
#
|
||||
# Container topology per app server:
|
||||
# app - PHP 8.3-FPM (all Laravel modules)
|
||||
# web - Nginx (static files + FastCGI proxy)
|
||||
# horizon - Laravel Horizon (queue worker)
|
||||
# scheduler - Laravel scheduler
|
||||
# mcp - Go MCP server
|
||||
# redis - Redis 7 (local cache + sessions)
|
||||
# galera - MariaDB 11 (Galera cluster node)
|
||||
|
||||
services:
|
||||
app:
|
||||
image: ${REGISTRY:-gitea.snider.dev}/host-uk/app:${TAG:-latest}
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- app-storage:/app/storage
|
||||
environment:
|
||||
- APP_ENV=production
|
||||
- APP_DEBUG=false
|
||||
- APP_URL=${APP_URL:-https://host.uk.com}
|
||||
- DB_HOST=galera
|
||||
- DB_PORT=3306
|
||||
- DB_DATABASE=${DB_DATABASE:-hostuk}
|
||||
- DB_USERNAME=${DB_USERNAME:-hostuk}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- REDIS_HOST=redis
|
||||
- REDIS_PORT=6379
|
||||
- CACHE_DRIVER=redis
|
||||
- SESSION_DRIVER=redis
|
||||
- QUEUE_CONNECTION=redis
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
galera:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "php-fpm-healthcheck || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
start_period: 10s
|
||||
retries: 3
|
||||
networks:
|
||||
- app-net
|
||||
|
||||
web:
|
||||
image: ${REGISTRY:-gitea.snider.dev}/host-uk/web:${TAG:-latest}
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "${WEB_PORT:-80}:80"
|
||||
volumes:
|
||||
- app-storage:/app/storage:ro
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost/health"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
start_period: 5s
|
||||
retries: 3
|
||||
networks:
|
||||
- app-net
|
||||
|
||||
horizon:
|
||||
image: ${REGISTRY:-gitea.snider.dev}/host-uk/app:${TAG:-latest}
|
||||
restart: unless-stopped
|
||||
command: php artisan horizon
|
||||
volumes:
|
||||
- app-storage:/app/storage
|
||||
environment:
|
||||
- APP_ENV=production
|
||||
- DB_HOST=galera
|
||||
- DB_PORT=3306
|
||||
- DB_DATABASE=${DB_DATABASE:-hostuk}
|
||||
- DB_USERNAME=${DB_USERNAME:-hostuk}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- REDIS_HOST=redis
|
||||
- REDIS_PORT=6379
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "php artisan horizon:status | grep -q running"]
|
||||
interval: 60s
|
||||
timeout: 5s
|
||||
start_period: 30s
|
||||
retries: 3
|
||||
networks:
|
||||
- app-net
|
||||
|
||||
scheduler:
|
||||
image: ${REGISTRY:-gitea.snider.dev}/host-uk/app:${TAG:-latest}
|
||||
restart: unless-stopped
|
||||
command: php artisan schedule:work
|
||||
volumes:
|
||||
- app-storage:/app/storage
|
||||
environment:
|
||||
- APP_ENV=production
|
||||
- DB_HOST=galera
|
||||
- DB_PORT=3306
|
||||
- DB_DATABASE=${DB_DATABASE:-hostuk}
|
||||
- DB_USERNAME=${DB_USERNAME:-hostuk}
|
||||
- DB_PASSWORD=${DB_PASSWORD}
|
||||
- REDIS_HOST=redis
|
||||
- REDIS_PORT=6379
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- app-net
|
||||
|
||||
mcp:
|
||||
image: ${REGISTRY:-gitea.snider.dev}/host-uk/core:${TAG:-latest}
|
||||
restart: unless-stopped
|
||||
command: core mcp serve
|
||||
ports:
|
||||
- "${MCP_PORT:-9001}:9000"
|
||||
environment:
|
||||
- MCP_ADDR=:9000
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "nc -z localhost 9000 || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 3s
|
||||
retries: 3
|
||||
networks:
|
||||
- app-net
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
restart: unless-stopped
|
||||
command: >
|
||||
redis-server
|
||||
--maxmemory 512mb
|
||||
--maxmemory-policy allkeys-lru
|
||||
--appendonly yes
|
||||
--appendfsync everysec
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 3s
|
||||
retries: 5
|
||||
networks:
|
||||
- app-net
|
||||
|
||||
galera:
|
||||
image: mariadb:11
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- MARIADB_ROOT_PASSWORD=${DB_ROOT_PASSWORD}
|
||||
- MARIADB_DATABASE=${DB_DATABASE:-hostuk}
|
||||
- MARIADB_USER=${DB_USERNAME:-hostuk}
|
||||
- MARIADB_PASSWORD=${DB_PASSWORD}
|
||||
- WSREP_CLUSTER_NAME=hostuk-galera
|
||||
- WSREP_CLUSTER_ADDRESS=${GALERA_CLUSTER_ADDRESS:-gcomm://}
|
||||
- WSREP_NODE_ADDRESS=${GALERA_NODE_ADDRESS}
|
||||
- WSREP_NODE_NAME=${GALERA_NODE_NAME}
|
||||
- WSREP_SST_METHOD=mariabackup
|
||||
command: >
|
||||
--wsrep-on=ON
|
||||
--wsrep-provider=/usr/lib/galera/libgalera_smm.so
|
||||
--wsrep-cluster-name=hostuk-galera
|
||||
--wsrep-cluster-address=${GALERA_CLUSTER_ADDRESS:-gcomm://}
|
||||
--wsrep-node-address=${GALERA_NODE_ADDRESS}
|
||||
--wsrep-node-name=${GALERA_NODE_NAME}
|
||||
--wsrep-sst-method=mariabackup
|
||||
--binlog-format=ROW
|
||||
--default-storage-engine=InnoDB
|
||||
--innodb-autoinc-lock-mode=2
|
||||
--innodb-buffer-pool-size=1G
|
||||
--innodb-log-file-size=256M
|
||||
--character-set-server=utf8mb4
|
||||
--collation-server=utf8mb4_unicode_ci
|
||||
volumes:
|
||||
- galera-data:/var/lib/mysql
|
||||
ports:
|
||||
- "${GALERA_PORT:-3306}:3306"
|
||||
- "4567:4567"
|
||||
- "4568:4568"
|
||||
- "4444:4444"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "mariadb -u root -p${DB_ROOT_PASSWORD} -e 'SHOW STATUS LIKE \"wsrep_ready\"' | grep -q ON"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
start_period: 60s
|
||||
retries: 5
|
||||
networks:
|
||||
- app-net
|
||||
|
||||
volumes:
|
||||
app-storage:
|
||||
redis-data:
|
||||
galera-data:
|
||||
|
||||
networks:
|
||||
app-net:
|
||||
driver: bridge
|
||||
59
docker/nginx/default.conf
Normal file
59
docker/nginx/default.conf
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
# Host UK Nginx Configuration
|
||||
# Proxies PHP to the app (FPM) container, serves static files directly
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name _;
|
||||
|
||||
root /app/public;
|
||||
index index.php;
|
||||
|
||||
charset utf-8;
|
||||
|
||||
# Security headers
|
||||
include /etc/nginx/snippets/security-headers.conf;
|
||||
|
||||
# Health check endpoint (no logging)
|
||||
location = /health {
|
||||
access_log off;
|
||||
try_files $uri /index.php?$query_string;
|
||||
}
|
||||
|
||||
# Static file caching
|
||||
location ~* \.(css|js|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot|webp|avif)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
access_log off;
|
||||
try_files $uri =404;
|
||||
}
|
||||
|
||||
# Laravel application
|
||||
location / {
|
||||
try_files $uri $uri/ /index.php?$query_string;
|
||||
}
|
||||
|
||||
# PHP-FPM upstream
|
||||
location ~ \.php$ {
|
||||
fastcgi_pass app:9000;
|
||||
fastcgi_param SCRIPT_FILENAME $realpath_root$fastcgi_script_name;
|
||||
include fastcgi_params;
|
||||
|
||||
fastcgi_hide_header X-Powered-By;
|
||||
fastcgi_buffer_size 32k;
|
||||
fastcgi_buffers 16 16k;
|
||||
fastcgi_read_timeout 300;
|
||||
|
||||
# Pass real client IP from LB proxy protocol
|
||||
fastcgi_param REMOTE_ADDR $http_x_forwarded_for;
|
||||
}
|
||||
|
||||
# Block dotfiles (except .well-known)
|
||||
location ~ /\.(?!well-known) {
|
||||
deny all;
|
||||
}
|
||||
|
||||
# Block access to sensitive files
|
||||
location ~* \.(env|log|yaml|yml|toml|lock|bak|sql)$ {
|
||||
deny all;
|
||||
}
|
||||
}
|
||||
6
docker/nginx/security-headers.conf
Normal file
6
docker/nginx/security-headers.conf
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
# Security headers for Host UK
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
||||
add_header Permissions-Policy "camera=(), microphone=(), geolocation=(), payment=()" always;
|
||||
10
docker/php/opcache.ini
Normal file
10
docker/php/opcache.ini
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
; OPcache configuration for production
|
||||
opcache.enable=1
|
||||
opcache.memory_consumption=256
|
||||
opcache.interned_strings_buffer=16
|
||||
opcache.max_accelerated_files=20000
|
||||
opcache.validate_timestamps=0
|
||||
opcache.save_comments=1
|
||||
opcache.fast_shutdown=1
|
||||
opcache.jit_buffer_size=128M
|
||||
opcache.jit=1255
|
||||
22
docker/php/php-fpm.conf
Normal file
22
docker/php/php-fpm.conf
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
; Host UK PHP-FPM pool configuration
|
||||
[www]
|
||||
pm = dynamic
|
||||
pm.max_children = 50
|
||||
pm.start_servers = 10
|
||||
pm.min_spare_servers = 5
|
||||
pm.max_spare_servers = 20
|
||||
pm.max_requests = 1000
|
||||
pm.process_idle_timeout = 10s
|
||||
|
||||
; Status page for health checks
|
||||
pm.status_path = /fpm-status
|
||||
ping.path = /fpm-ping
|
||||
ping.response = pong
|
||||
|
||||
; Logging
|
||||
access.log = /proc/self/fd/2
|
||||
slowlog = /proc/self/fd/2
|
||||
request_slowlog_timeout = 5s
|
||||
|
||||
; Security
|
||||
security.limit_extensions = .php
|
||||
268
infra.yaml
Normal file
268
infra.yaml
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
# Infrastructure Configuration — Host UK Production
|
||||
# This file is the source of truth for production topology.
|
||||
# Used by: core prod status, core prod setup, core deploy ansible
|
||||
|
||||
# --- Hosts ---
|
||||
hosts:
|
||||
noc:
|
||||
fqdn: noc.host.uk.com
|
||||
ip: 77.42.42.205
|
||||
private_ip: 10.0.0.4
|
||||
type: hcloud
|
||||
role: bastion
|
||||
ssh:
|
||||
user: root
|
||||
key: ~/.ssh/hostuk
|
||||
port: 22
|
||||
services:
|
||||
- coolify
|
||||
|
||||
de:
|
||||
fqdn: de.host.uk.com
|
||||
ip: 116.202.82.115
|
||||
type: hrobot
|
||||
role: app
|
||||
ssh:
|
||||
user: root
|
||||
key: ~/.ssh/hostuk
|
||||
port: 22
|
||||
services:
|
||||
- traefik
|
||||
- app
|
||||
- web
|
||||
- horizon
|
||||
- scheduler
|
||||
- mcp
|
||||
- redis
|
||||
- galera
|
||||
|
||||
de2:
|
||||
fqdn: de2.host.uk.com
|
||||
ip: 88.99.195.41
|
||||
type: hrobot
|
||||
role: app
|
||||
ssh:
|
||||
user: root
|
||||
key: ~/.ssh/hostuk
|
||||
port: 22
|
||||
services:
|
||||
- traefik
|
||||
- app
|
||||
- web
|
||||
- horizon
|
||||
- scheduler
|
||||
- mcp
|
||||
- redis
|
||||
- galera
|
||||
|
||||
build:
|
||||
fqdn: build.de.host.uk.com
|
||||
ip: 46.224.93.62
|
||||
private_ip: 10.0.0.5
|
||||
type: hcloud
|
||||
role: builder
|
||||
ssh:
|
||||
user: root
|
||||
key: ~/.ssh/hostuk
|
||||
port: 22
|
||||
services:
|
||||
- forgejo-runner
|
||||
|
||||
# --- Load Balancer ---
|
||||
load_balancer:
|
||||
name: hermes
|
||||
fqdn: hermes.lb.host.uk.com
|
||||
provider: hetzner
|
||||
type: lb11
|
||||
location: fsn1
|
||||
algorithm: round_robin
|
||||
backends:
|
||||
- host: de
|
||||
port: 80
|
||||
- host: de2
|
||||
port: 80
|
||||
health_check:
|
||||
protocol: http
|
||||
path: /health
|
||||
interval: 15
|
||||
listeners:
|
||||
- frontend: 443
|
||||
backend: 80
|
||||
protocol: https
|
||||
proxy_protocol: true
|
||||
ssl:
|
||||
certificate: "*.host.uk.com"
|
||||
san:
|
||||
- host.uk.com
|
||||
|
||||
# --- Private Network ---
|
||||
network:
|
||||
cidr: 10.0.0.0/16
|
||||
name: host-uk-internal
|
||||
|
||||
# --- DNS ---
|
||||
dns:
|
||||
provider: cloudns
|
||||
nameservers:
|
||||
- ns1.lthn.io
|
||||
- ns2.lthn.io
|
||||
- ns3.lthn.io
|
||||
- ns4.lthn.io
|
||||
zones:
|
||||
host.uk.com:
|
||||
records:
|
||||
- name: "@"
|
||||
type: A
|
||||
value: "{{.lb_ip}}"
|
||||
ttl: 300
|
||||
- name: "*"
|
||||
type: CNAME
|
||||
value: hermes.lb.host.uk.com
|
||||
ttl: 300
|
||||
- name: hermes.lb
|
||||
type: A
|
||||
value: "{{.lb_ip}}"
|
||||
ttl: 300
|
||||
- name: noc
|
||||
type: A
|
||||
value: 77.42.42.205
|
||||
ttl: 300
|
||||
- name: de
|
||||
type: A
|
||||
value: 116.202.82.115
|
||||
ttl: 300
|
||||
- name: de2
|
||||
type: A
|
||||
value: 88.99.195.41
|
||||
ttl: 300
|
||||
- name: build.de
|
||||
type: A
|
||||
value: 46.224.93.62
|
||||
ttl: 300
|
||||
|
||||
# --- SSL ---
|
||||
ssl:
|
||||
wildcard:
|
||||
domains:
|
||||
- "*.host.uk.com"
|
||||
- host.uk.com
|
||||
method: dns-01
|
||||
dns_provider: cloudns
|
||||
termination: load_balancer
|
||||
|
||||
# --- Database ---
|
||||
database:
|
||||
engine: mariadb
|
||||
version: "11"
|
||||
cluster: galera
|
||||
nodes:
|
||||
- host: de
|
||||
port: 3306
|
||||
- host: de2
|
||||
port: 3306
|
||||
sst_method: mariabackup
|
||||
backup:
|
||||
schedule: "0 3 * * *"
|
||||
destination: s3
|
||||
bucket: hostuk
|
||||
prefix: backup/galera/
|
||||
|
||||
# --- Cache ---
|
||||
cache:
|
||||
engine: redis
|
||||
version: "7"
|
||||
sentinel: true
|
||||
nodes:
|
||||
- host: de
|
||||
port: 6379
|
||||
- host: de2
|
||||
port: 6379
|
||||
|
||||
# --- Containers (per app server) ---
|
||||
containers:
|
||||
app:
|
||||
image: host-uk/app:latest
|
||||
port: 9000
|
||||
runtime: php-fpm
|
||||
replicas: 1
|
||||
|
||||
web:
|
||||
image: host-uk/web:latest
|
||||
port: 80
|
||||
runtime: nginx
|
||||
depends_on: [app]
|
||||
|
||||
horizon:
|
||||
image: host-uk/app:latest
|
||||
command: php artisan horizon
|
||||
replicas: 1
|
||||
|
||||
scheduler:
|
||||
image: host-uk/app:latest
|
||||
command: php artisan schedule:work
|
||||
replicas: 1
|
||||
|
||||
mcp:
|
||||
image: host-uk/core:latest
|
||||
port: 9000
|
||||
command: core mcp serve
|
||||
replicas: 1
|
||||
|
||||
# --- Object Storage ---
|
||||
s3:
|
||||
endpoint: fsn1.your-objectstorage.com
|
||||
buckets:
|
||||
hostuk:
|
||||
purpose: infra
|
||||
paths:
|
||||
- backup/galera/
|
||||
- backup/coolify/
|
||||
- backup/certs/
|
||||
host-uk:
|
||||
purpose: media
|
||||
paths:
|
||||
- uploads/
|
||||
- assets/
|
||||
|
||||
# --- CDN ---
|
||||
cdn:
|
||||
provider: bunnycdn
|
||||
origin: hermes.lb.host.uk.com
|
||||
zones:
|
||||
- "*.host.uk.com"
|
||||
|
||||
# --- CI/CD ---
|
||||
cicd:
|
||||
provider: forgejo
|
||||
url: https://gitea.snider.dev
|
||||
runner: build.de
|
||||
registry: gitea.snider.dev
|
||||
deploy_hook: coolify
|
||||
|
||||
# --- Monitoring ---
|
||||
monitoring:
|
||||
health_endpoints:
|
||||
- url: https://host.uk.com/health
|
||||
interval: 60
|
||||
- url: https://bio.host.uk.com/health
|
||||
interval: 60
|
||||
alerts:
|
||||
galera_cluster_size: 2
|
||||
redis_sentinel_quorum: 2
|
||||
|
||||
# --- Backups ---
|
||||
backups:
|
||||
daily:
|
||||
- name: galera
|
||||
type: mysqldump
|
||||
destination: s3://hostuk/backup/galera/
|
||||
- name: coolify
|
||||
type: tar
|
||||
destination: s3://hostuk/backup/coolify/
|
||||
- name: certs
|
||||
type: tar
|
||||
destination: s3://hostuk/backup/certs/
|
||||
weekly:
|
||||
- name: snapshot
|
||||
type: hcloud-snapshot
|
||||
hosts: [noc, build]
|
||||
15
internal/cmd/prod/cmd_commands.go
Normal file
15
internal/cmd/prod/cmd_commands.go
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
package prod
|
||||
|
||||
import (
|
||||
"github.com/host-uk/core/pkg/cli"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func init() {
|
||||
cli.RegisterCommands(AddProdCommands)
|
||||
}
|
||||
|
||||
// AddProdCommands registers the 'prod' command and all subcommands.
|
||||
func AddProdCommands(root *cobra.Command) {
|
||||
root.AddCommand(Cmd)
|
||||
}
|
||||
129
internal/cmd/prod/cmd_dns.go
Normal file
129
internal/cmd/prod/cmd_dns.go
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
package prod
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/host-uk/core/pkg/cli"
|
||||
"github.com/host-uk/core/pkg/infra"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var dnsCmd = &cobra.Command{
|
||||
Use: "dns",
|
||||
Short: "Manage DNS records via CloudNS",
|
||||
Long: `View and manage DNS records for host.uk.com via CloudNS API.
|
||||
|
||||
Requires:
|
||||
CLOUDNS_AUTH_ID CloudNS auth ID
|
||||
CLOUDNS_AUTH_PASSWORD CloudNS auth password`,
|
||||
}
|
||||
|
||||
var dnsListCmd = &cobra.Command{
|
||||
Use: "list [zone]",
|
||||
Short: "List DNS records",
|
||||
Args: cobra.MaximumNArgs(1),
|
||||
RunE: runDNSList,
|
||||
}
|
||||
|
||||
var dnsSetCmd = &cobra.Command{
|
||||
Use: "set <host> <type> <value>",
|
||||
Short: "Create or update a DNS record",
|
||||
Long: `Create or update a DNS record. Example:
|
||||
core prod dns set hermes.lb A 1.2.3.4
|
||||
core prod dns set "*.host.uk.com" CNAME hermes.lb.host.uk.com`,
|
||||
Args: cobra.ExactArgs(3),
|
||||
RunE: runDNSSet,
|
||||
}
|
||||
|
||||
var (
|
||||
dnsZone string
|
||||
dnsTTL int
|
||||
)
|
||||
|
||||
func init() {
|
||||
dnsCmd.PersistentFlags().StringVar(&dnsZone, "zone", "host.uk.com", "DNS zone")
|
||||
|
||||
dnsSetCmd.Flags().IntVar(&dnsTTL, "ttl", 300, "Record TTL in seconds")
|
||||
|
||||
dnsCmd.AddCommand(dnsListCmd)
|
||||
dnsCmd.AddCommand(dnsSetCmd)
|
||||
}
|
||||
|
||||
func getDNSClient() (*infra.CloudNSClient, error) {
|
||||
authID := os.Getenv("CLOUDNS_AUTH_ID")
|
||||
authPass := os.Getenv("CLOUDNS_AUTH_PASSWORD")
|
||||
if authID == "" || authPass == "" {
|
||||
return nil, fmt.Errorf("CLOUDNS_AUTH_ID and CLOUDNS_AUTH_PASSWORD required")
|
||||
}
|
||||
return infra.NewCloudNSClient(authID, authPass), nil
|
||||
}
|
||||
|
||||
func runDNSList(cmd *cobra.Command, args []string) error {
|
||||
dns, err := getDNSClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
zone := dnsZone
|
||||
if len(args) > 0 {
|
||||
zone = args[0]
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
records, err := dns.ListRecords(ctx, zone)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list records: %w", err)
|
||||
}
|
||||
|
||||
cli.Print("%s DNS records for %s\n\n", cli.BoldStyle.Render("▶"), cli.TitleStyle.Render(zone))
|
||||
|
||||
if len(records) == 0 {
|
||||
cli.Print(" No records found\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
for id, r := range records {
|
||||
cli.Print(" %s %-6s %-30s %s TTL:%s\n",
|
||||
cli.DimStyle.Render(id),
|
||||
cli.BoldStyle.Render(r.Type),
|
||||
r.Host,
|
||||
r.Record,
|
||||
r.TTL)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runDNSSet(cmd *cobra.Command, args []string) error {
|
||||
dns, err := getDNSClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
host := args[0]
|
||||
recordType := args[1]
|
||||
value := args[2]
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
changed, err := dns.EnsureRecord(ctx, dnsZone, host, recordType, value, dnsTTL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set record: %w", err)
|
||||
}
|
||||
|
||||
if changed {
|
||||
cli.Print("%s %s %s %s -> %s\n",
|
||||
cli.SuccessStyle.Render("✓"),
|
||||
recordType, host, dnsZone, value)
|
||||
} else {
|
||||
cli.Print("%s Record already correct\n", cli.DimStyle.Render("·"))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
113
internal/cmd/prod/cmd_lb.go
Normal file
113
internal/cmd/prod/cmd_lb.go
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
package prod
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/host-uk/core/pkg/cli"
|
||||
"github.com/host-uk/core/pkg/infra"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var lbCmd = &cobra.Command{
|
||||
Use: "lb",
|
||||
Short: "Manage Hetzner load balancer",
|
||||
Long: `View and manage the Hetzner Cloud managed load balancer.
|
||||
|
||||
Requires: HCLOUD_TOKEN`,
|
||||
}
|
||||
|
||||
var lbStatusCmd = &cobra.Command{
|
||||
Use: "status",
|
||||
Short: "Show load balancer status and target health",
|
||||
RunE: runLBStatus,
|
||||
}
|
||||
|
||||
var lbCreateCmd = &cobra.Command{
|
||||
Use: "create",
|
||||
Short: "Create load balancer from infra.yaml",
|
||||
RunE: runLBCreate,
|
||||
}
|
||||
|
||||
func init() {
|
||||
lbCmd.AddCommand(lbStatusCmd)
|
||||
lbCmd.AddCommand(lbCreateCmd)
|
||||
}
|
||||
|
||||
func getHCloudClient() (*infra.HCloudClient, error) {
|
||||
token := os.Getenv("HCLOUD_TOKEN")
|
||||
if token == "" {
|
||||
return nil, fmt.Errorf("HCLOUD_TOKEN environment variable required")
|
||||
}
|
||||
return infra.NewHCloudClient(token), nil
|
||||
}
|
||||
|
||||
func runLBStatus(cmd *cobra.Command, args []string) error {
|
||||
hc, err := getHCloudClient()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
lbs, err := hc.ListLoadBalancers(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list load balancers: %w", err)
|
||||
}
|
||||
|
||||
if len(lbs) == 0 {
|
||||
cli.Print("No load balancers found\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, lb := range lbs {
|
||||
cli.Print("%s %s\n", cli.BoldStyle.Render("▶"), cli.TitleStyle.Render(lb.Name))
|
||||
cli.Print(" ID: %d\n", lb.ID)
|
||||
cli.Print(" IP: %s\n", lb.PublicNet.IPv4.IP)
|
||||
cli.Print(" Algorithm: %s\n", lb.Algorithm.Type)
|
||||
cli.Print(" Location: %s\n", lb.Location.Name)
|
||||
|
||||
if len(lb.Services) > 0 {
|
||||
cli.Print("\n Services:\n")
|
||||
for _, s := range lb.Services {
|
||||
cli.Print(" %s :%d -> :%d proxy_protocol=%v\n",
|
||||
s.Protocol, s.ListenPort, s.DestinationPort, s.Proxyprotocol)
|
||||
}
|
||||
}
|
||||
|
||||
if len(lb.Targets) > 0 {
|
||||
cli.Print("\n Targets:\n")
|
||||
for _, t := range lb.Targets {
|
||||
ip := ""
|
||||
if t.IP != nil {
|
||||
ip = t.IP.IP
|
||||
}
|
||||
for _, hs := range t.HealthStatus {
|
||||
icon := cli.SuccessStyle.Render("●")
|
||||
if hs.Status != "healthy" {
|
||||
icon = cli.ErrorStyle.Render("○")
|
||||
}
|
||||
cli.Print(" %s %s :%d %s\n", icon, ip, hs.ListenPort, hs.Status)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func runLBCreate(cmd *cobra.Command, args []string) error {
|
||||
cfg, _, err := loadConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
return stepLoadBalancer(ctx, cfg)
|
||||
}
|
||||
35
internal/cmd/prod/cmd_prod.go
Normal file
35
internal/cmd/prod/cmd_prod.go
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
package prod
|
||||
|
||||
import (
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
infraFile string
|
||||
)
|
||||
|
||||
// Cmd is the root prod command.
|
||||
var Cmd = &cobra.Command{
|
||||
Use: "prod",
|
||||
Short: "Production infrastructure management",
|
||||
Long: `Manage the Host UK production infrastructure.
|
||||
|
||||
Commands:
|
||||
status Show infrastructure health and connectivity
|
||||
setup Phase 1: discover topology, create LB, configure DNS
|
||||
dns Manage DNS records via CloudNS
|
||||
lb Manage Hetzner load balancer
|
||||
ssh SSH into a production host
|
||||
|
||||
Configuration is read from infra.yaml in the project root.`,
|
||||
}
|
||||
|
||||
func init() {
|
||||
Cmd.PersistentFlags().StringVar(&infraFile, "config", "", "Path to infra.yaml (auto-discovered if not set)")
|
||||
|
||||
Cmd.AddCommand(statusCmd)
|
||||
Cmd.AddCommand(setupCmd)
|
||||
Cmd.AddCommand(dnsCmd)
|
||||
Cmd.AddCommand(lbCmd)
|
||||
Cmd.AddCommand(sshCmd)
|
||||
}
|
||||
284
internal/cmd/prod/cmd_setup.go
Normal file
284
internal/cmd/prod/cmd_setup.go
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
package prod
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/host-uk/core/pkg/cli"
|
||||
"github.com/host-uk/core/pkg/infra"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var setupCmd = &cobra.Command{
|
||||
Use: "setup",
|
||||
Short: "Phase 1: discover topology, create LB, configure DNS",
|
||||
Long: `Run the Phase 1 foundation setup:
|
||||
|
||||
1. Discover Hetzner topology (Cloud + Robot servers)
|
||||
2. Create Hetzner managed load balancer
|
||||
3. Configure DNS records via CloudNS
|
||||
4. Verify connectivity to all hosts
|
||||
|
||||
Required environment variables:
|
||||
HCLOUD_TOKEN Hetzner Cloud API token
|
||||
HETZNER_ROBOT_USER Hetzner Robot username
|
||||
HETZNER_ROBOT_PASS Hetzner Robot password
|
||||
CLOUDNS_AUTH_ID CloudNS auth ID
|
||||
CLOUDNS_AUTH_PASSWORD CloudNS auth password`,
|
||||
RunE: runSetup,
|
||||
}
|
||||
|
||||
var (
|
||||
setupDryRun bool
|
||||
setupStep string
|
||||
)
|
||||
|
||||
func init() {
|
||||
setupCmd.Flags().BoolVar(&setupDryRun, "dry-run", false, "Show what would be done without making changes")
|
||||
setupCmd.Flags().StringVar(&setupStep, "step", "", "Run a specific step only (discover, lb, dns)")
|
||||
}
|
||||
|
||||
func runSetup(cmd *cobra.Command, args []string) error {
|
||||
cfg, cfgPath, err := loadConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cli.Print("%s Production setup from %s\n\n",
|
||||
cli.BoldStyle.Render("▶"),
|
||||
cli.DimStyle.Render(cfgPath))
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
steps := []struct {
|
||||
name string
|
||||
fn func(context.Context, *infra.Config) error
|
||||
}{
|
||||
{"discover", stepDiscover},
|
||||
{"lb", stepLoadBalancer},
|
||||
{"dns", stepDNS},
|
||||
}
|
||||
|
||||
for _, step := range steps {
|
||||
if setupStep != "" && setupStep != step.name {
|
||||
continue
|
||||
}
|
||||
|
||||
cli.Print("\n%s Step: %s\n", cli.BoldStyle.Render("━━"), cli.TitleStyle.Render(step.name))
|
||||
|
||||
if err := step.fn(ctx, cfg); err != nil {
|
||||
cli.Print(" %s %s: %s\n", cli.ErrorStyle.Render("✗"), step.name, err)
|
||||
return fmt.Errorf("step %s failed: %w", step.name, err)
|
||||
}
|
||||
|
||||
cli.Print(" %s %s complete\n", cli.SuccessStyle.Render("✓"), step.name)
|
||||
}
|
||||
|
||||
cli.Print("\n%s Setup complete\n", cli.SuccessStyle.Render("✓"))
|
||||
return nil
|
||||
}
|
||||
|
||||
func stepDiscover(ctx context.Context, cfg *infra.Config) error {
|
||||
// Discover HCloud servers
|
||||
hcloudToken := os.Getenv("HCLOUD_TOKEN")
|
||||
if hcloudToken != "" {
|
||||
cli.Print(" Discovering Hetzner Cloud servers...\n")
|
||||
|
||||
hc := infra.NewHCloudClient(hcloudToken)
|
||||
servers, err := hc.ListServers(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list HCloud servers: %w", err)
|
||||
}
|
||||
|
||||
for _, s := range servers {
|
||||
cli.Print(" %s %s %s %s %s\n",
|
||||
cli.SuccessStyle.Render("●"),
|
||||
cli.BoldStyle.Render(s.Name),
|
||||
s.PublicNet.IPv4.IP,
|
||||
s.ServerType.Name,
|
||||
cli.DimStyle.Render(s.Datacenter.Name))
|
||||
}
|
||||
} else {
|
||||
cli.Print(" %s HCLOUD_TOKEN not set — skipping Cloud discovery\n",
|
||||
cli.WarningStyle.Render("⚠"))
|
||||
}
|
||||
|
||||
// Discover Robot servers
|
||||
robotUser := os.Getenv("HETZNER_ROBOT_USER")
|
||||
robotPass := os.Getenv("HETZNER_ROBOT_PASS")
|
||||
if robotUser != "" && robotPass != "" {
|
||||
cli.Print(" Discovering Hetzner Robot servers...\n")
|
||||
|
||||
hr := infra.NewHRobotClient(robotUser, robotPass)
|
||||
servers, err := hr.ListServers(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list Robot servers: %w", err)
|
||||
}
|
||||
|
||||
for _, s := range servers {
|
||||
status := cli.SuccessStyle.Render("●")
|
||||
if s.Status != "ready" {
|
||||
status = cli.WarningStyle.Render("○")
|
||||
}
|
||||
cli.Print(" %s %s %s %s %s\n",
|
||||
status,
|
||||
cli.BoldStyle.Render(s.ServerName),
|
||||
s.ServerIP,
|
||||
s.Product,
|
||||
cli.DimStyle.Render(s.Datacenter))
|
||||
}
|
||||
} else {
|
||||
cli.Print(" %s HETZNER_ROBOT_USER/PASS not set — skipping Robot discovery\n",
|
||||
cli.WarningStyle.Render("⚠"))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stepLoadBalancer(ctx context.Context, cfg *infra.Config) error {
|
||||
hcloudToken := os.Getenv("HCLOUD_TOKEN")
|
||||
if hcloudToken == "" {
|
||||
return fmt.Errorf("HCLOUD_TOKEN required for load balancer management")
|
||||
}
|
||||
|
||||
hc := infra.NewHCloudClient(hcloudToken)
|
||||
|
||||
// Check if LB already exists
|
||||
lbs, err := hc.ListLoadBalancers(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list load balancers: %w", err)
|
||||
}
|
||||
|
||||
for _, lb := range lbs {
|
||||
if lb.Name == cfg.LoadBalancer.Name {
|
||||
cli.Print(" Load balancer '%s' already exists (ID: %d, IP: %s)\n",
|
||||
lb.Name, lb.ID, lb.PublicNet.IPv4.IP)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if setupDryRun {
|
||||
cli.Print(" [dry-run] Would create load balancer '%s' (%s) in %s\n",
|
||||
cfg.LoadBalancer.Name, cfg.LoadBalancer.Type, cfg.LoadBalancer.Location)
|
||||
for _, b := range cfg.LoadBalancer.Backends {
|
||||
if host, ok := cfg.Hosts[b.Host]; ok {
|
||||
cli.Print(" [dry-run] Backend: %s (%s:%d)\n", b.Host, host.IP, b.Port)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Build targets from config
|
||||
targets := make([]infra.HCloudLBCreateTarget, 0, len(cfg.LoadBalancer.Backends))
|
||||
for _, b := range cfg.LoadBalancer.Backends {
|
||||
host, ok := cfg.Hosts[b.Host]
|
||||
if !ok {
|
||||
return fmt.Errorf("backend host '%s' not found in config", b.Host)
|
||||
}
|
||||
targets = append(targets, infra.HCloudLBCreateTarget{
|
||||
Type: "ip",
|
||||
IP: &infra.HCloudLBTargetIP{IP: host.IP},
|
||||
})
|
||||
}
|
||||
|
||||
// Build services
|
||||
services := make([]infra.HCloudLBService, 0, len(cfg.LoadBalancer.Listeners))
|
||||
for _, l := range cfg.LoadBalancer.Listeners {
|
||||
svc := infra.HCloudLBService{
|
||||
Protocol: l.Protocol,
|
||||
ListenPort: l.Frontend,
|
||||
DestinationPort: l.Backend,
|
||||
Proxyprotocol: l.ProxyProtocol,
|
||||
HealthCheck: &infra.HCloudLBHealthCheck{
|
||||
Protocol: cfg.LoadBalancer.Health.Protocol,
|
||||
Port: l.Backend,
|
||||
Interval: cfg.LoadBalancer.Health.Interval,
|
||||
Timeout: 10,
|
||||
Retries: 3,
|
||||
HTTP: &infra.HCloudLBHCHTTP{
|
||||
Path: cfg.LoadBalancer.Health.Path,
|
||||
StatusCode: "2??",
|
||||
},
|
||||
},
|
||||
}
|
||||
services = append(services, svc)
|
||||
}
|
||||
|
||||
req := infra.HCloudLBCreateRequest{
|
||||
Name: cfg.LoadBalancer.Name,
|
||||
LoadBalancerType: cfg.LoadBalancer.Type,
|
||||
Location: cfg.LoadBalancer.Location,
|
||||
Algorithm: infra.HCloudLBAlgorithm{Type: cfg.LoadBalancer.Algorithm},
|
||||
Services: services,
|
||||
Targets: targets,
|
||||
Labels: map[string]string{
|
||||
"project": "host-uk",
|
||||
"managed": "core-cli",
|
||||
},
|
||||
}
|
||||
|
||||
cli.Print(" Creating load balancer '%s'...\n", cfg.LoadBalancer.Name)
|
||||
|
||||
lb, err := hc.CreateLoadBalancer(ctx, req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("create load balancer: %w", err)
|
||||
}
|
||||
|
||||
cli.Print(" Created: %s (ID: %d, IP: %s)\n",
|
||||
cli.BoldStyle.Render(lb.Name), lb.ID, lb.PublicNet.IPv4.IP)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stepDNS(ctx context.Context, cfg *infra.Config) error {
|
||||
authID := os.Getenv("CLOUDNS_AUTH_ID")
|
||||
authPass := os.Getenv("CLOUDNS_AUTH_PASSWORD")
|
||||
if authID == "" || authPass == "" {
|
||||
return fmt.Errorf("CLOUDNS_AUTH_ID and CLOUDNS_AUTH_PASSWORD required")
|
||||
}
|
||||
|
||||
dns := infra.NewCloudNSClient(authID, authPass)
|
||||
|
||||
for zoneName, zone := range cfg.DNS.Zones {
|
||||
cli.Print(" Zone: %s\n", cli.BoldStyle.Render(zoneName))
|
||||
|
||||
for _, rec := range zone.Records {
|
||||
value := rec.Value
|
||||
// Skip templated values (need LB IP first)
|
||||
if value == "{{.lb_ip}}" {
|
||||
cli.Print(" %s %s %s %s — %s\n",
|
||||
cli.WarningStyle.Render("⚠"),
|
||||
rec.Name, rec.Type, value,
|
||||
cli.DimStyle.Render("needs LB IP (run setup --step=lb first)"))
|
||||
continue
|
||||
}
|
||||
|
||||
if setupDryRun {
|
||||
cli.Print(" [dry-run] %s %s -> %s (TTL: %d)\n",
|
||||
rec.Type, rec.Name, value, rec.TTL)
|
||||
continue
|
||||
}
|
||||
|
||||
changed, err := dns.EnsureRecord(ctx, zoneName, rec.Name, rec.Type, value, rec.TTL)
|
||||
if err != nil {
|
||||
cli.Print(" %s %s %s: %s\n", cli.ErrorStyle.Render("✗"), rec.Type, rec.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if changed {
|
||||
cli.Print(" %s %s %s -> %s\n",
|
||||
cli.SuccessStyle.Render("✓"),
|
||||
rec.Type, rec.Name, value)
|
||||
} else {
|
||||
cli.Print(" %s %s %s (no change)\n",
|
||||
cli.DimStyle.Render("·"),
|
||||
rec.Type, rec.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
64
internal/cmd/prod/cmd_ssh.go
Normal file
64
internal/cmd/prod/cmd_ssh.go
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
package prod
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
|
||||
"github.com/host-uk/core/pkg/cli"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var sshCmd = &cobra.Command{
|
||||
Use: "ssh <host>",
|
||||
Short: "SSH into a production host",
|
||||
Long: `Open an SSH session to a production host defined in infra.yaml.
|
||||
|
||||
Examples:
|
||||
core prod ssh noc
|
||||
core prod ssh de
|
||||
core prod ssh de2
|
||||
core prod ssh build`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: runSSH,
|
||||
}
|
||||
|
||||
func runSSH(cmd *cobra.Command, args []string) error {
|
||||
cfg, _, err := loadConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
name := args[0]
|
||||
host, ok := cfg.Hosts[name]
|
||||
if !ok {
|
||||
// List available hosts
|
||||
cli.Print("Unknown host '%s'. Available:\n", name)
|
||||
for n, h := range cfg.Hosts {
|
||||
cli.Print(" %s %s (%s)\n", cli.BoldStyle.Render(n), h.IP, h.Role)
|
||||
}
|
||||
return fmt.Errorf("host '%s' not found in infra.yaml", name)
|
||||
}
|
||||
|
||||
sshArgs := []string{
|
||||
"ssh",
|
||||
"-i", host.SSH.Key,
|
||||
"-p", fmt.Sprintf("%d", host.SSH.Port),
|
||||
"-o", "StrictHostKeyChecking=accept-new",
|
||||
fmt.Sprintf("%s@%s", host.SSH.User, host.IP),
|
||||
}
|
||||
|
||||
cli.Print("%s %s@%s (%s)\n",
|
||||
cli.BoldStyle.Render("▶"),
|
||||
host.SSH.User, host.FQDN,
|
||||
cli.DimStyle.Render(host.IP))
|
||||
|
||||
sshPath, err := exec.LookPath("ssh")
|
||||
if err != nil {
|
||||
return fmt.Errorf("ssh not found: %w", err)
|
||||
}
|
||||
|
||||
// Replace current process with SSH
|
||||
return syscall.Exec(sshPath, sshArgs, os.Environ())
|
||||
}
|
||||
325
internal/cmd/prod/cmd_status.go
Normal file
325
internal/cmd/prod/cmd_status.go
Normal file
|
|
@ -0,0 +1,325 @@
|
|||
package prod
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/host-uk/core/pkg/ansible"
|
||||
"github.com/host-uk/core/pkg/cli"
|
||||
"github.com/host-uk/core/pkg/infra"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var statusCmd = &cobra.Command{
|
||||
Use: "status",
|
||||
Short: "Show production infrastructure health",
|
||||
Long: `Check connectivity, services, and cluster health across all production hosts.
|
||||
|
||||
Tests:
|
||||
- SSH connectivity to all hosts
|
||||
- Docker daemon status
|
||||
- Coolify controller (noc)
|
||||
- Galera cluster state (de, de2)
|
||||
- Redis Sentinel status (de, de2)
|
||||
- Load balancer health (if HCLOUD_TOKEN set)`,
|
||||
RunE: runStatus,
|
||||
}
|
||||
|
||||
type hostStatus struct {
|
||||
Name string
|
||||
Host *infra.Host
|
||||
Connected bool
|
||||
ConnTime time.Duration
|
||||
OS string
|
||||
Docker string
|
||||
Services map[string]string
|
||||
Error error
|
||||
}
|
||||
|
||||
func runStatus(cmd *cobra.Command, args []string) error {
|
||||
cfg, cfgPath, err := loadConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cli.Print("%s Infrastructure status from %s\n\n",
|
||||
cli.BoldStyle.Render("▶"),
|
||||
cli.DimStyle.Render(cfgPath))
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Check all hosts in parallel
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
mu sync.Mutex
|
||||
statuses []hostStatus
|
||||
)
|
||||
|
||||
for name, host := range cfg.Hosts {
|
||||
wg.Add(1)
|
||||
go func(name string, host *infra.Host) {
|
||||
defer wg.Done()
|
||||
s := checkHost(ctx, name, host)
|
||||
mu.Lock()
|
||||
statuses = append(statuses, s)
|
||||
mu.Unlock()
|
||||
}(name, host)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Print results in consistent order
|
||||
order := []string{"noc", "de", "de2", "build"}
|
||||
for _, name := range order {
|
||||
for _, s := range statuses {
|
||||
if s.Name == name {
|
||||
printHostStatus(s)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check LB if token available
|
||||
if token := os.Getenv("HCLOUD_TOKEN"); token != "" {
|
||||
fmt.Println()
|
||||
checkLoadBalancer(ctx, token)
|
||||
} else {
|
||||
fmt.Println()
|
||||
cli.Print("%s Load balancer: %s\n",
|
||||
cli.DimStyle.Render(" ○"),
|
||||
cli.DimStyle.Render("HCLOUD_TOKEN not set (skipped)"))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkHost(ctx context.Context, name string, host *infra.Host) hostStatus {
|
||||
s := hostStatus{
|
||||
Name: name,
|
||||
Host: host,
|
||||
Services: make(map[string]string),
|
||||
}
|
||||
|
||||
sshCfg := ansible.SSHConfig{
|
||||
Host: host.IP,
|
||||
Port: host.SSH.Port,
|
||||
User: host.SSH.User,
|
||||
KeyFile: host.SSH.Key,
|
||||
Timeout: 15 * time.Second,
|
||||
}
|
||||
|
||||
client, err := ansible.NewSSHClient(sshCfg)
|
||||
if err != nil {
|
||||
s.Error = fmt.Errorf("create SSH client: %w", err)
|
||||
return s
|
||||
}
|
||||
defer func() { _ = client.Close() }()
|
||||
|
||||
start := time.Now()
|
||||
if err := client.Connect(ctx); err != nil {
|
||||
s.Error = fmt.Errorf("SSH connect: %w", err)
|
||||
return s
|
||||
}
|
||||
s.Connected = true
|
||||
s.ConnTime = time.Since(start)
|
||||
|
||||
// OS info
|
||||
stdout, _, _, _ := client.Run(ctx, "cat /etc/os-release 2>/dev/null | grep PRETTY_NAME | cut -d'\"' -f2")
|
||||
s.OS = strings.TrimSpace(stdout)
|
||||
|
||||
// Docker
|
||||
stdout, _, _, err = client.Run(ctx, "docker --version 2>/dev/null | head -1")
|
||||
if err == nil && stdout != "" {
|
||||
s.Docker = strings.TrimSpace(stdout)
|
||||
}
|
||||
|
||||
// Check each expected service
|
||||
for _, svc := range host.Services {
|
||||
status := checkService(ctx, client, svc)
|
||||
s.Services[svc] = status
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func checkService(ctx context.Context, client *ansible.SSHClient, service string) string {
|
||||
switch service {
|
||||
case "coolify":
|
||||
stdout, _, _, _ := client.Run(ctx, "docker ps --format '{{.Names}}' 2>/dev/null | grep -c coolify")
|
||||
if strings.TrimSpace(stdout) != "0" && strings.TrimSpace(stdout) != "" {
|
||||
return "running"
|
||||
}
|
||||
return "not running"
|
||||
|
||||
case "traefik":
|
||||
stdout, _, _, _ := client.Run(ctx, "docker ps --format '{{.Names}}' 2>/dev/null | grep -c traefik")
|
||||
if strings.TrimSpace(stdout) != "0" && strings.TrimSpace(stdout) != "" {
|
||||
return "running"
|
||||
}
|
||||
return "not running"
|
||||
|
||||
case "galera":
|
||||
// Check Galera cluster state
|
||||
stdout, _, _, _ := client.Run(ctx,
|
||||
"docker exec $(docker ps -q --filter name=mariadb 2>/dev/null || echo none) "+
|
||||
"mariadb -u root -e \"SHOW STATUS LIKE 'wsrep_cluster_size'\" --skip-column-names 2>/dev/null | awk '{print $2}'")
|
||||
size := strings.TrimSpace(stdout)
|
||||
if size != "" && size != "0" {
|
||||
return fmt.Sprintf("cluster_size=%s", size)
|
||||
}
|
||||
// Try non-Docker
|
||||
stdout, _, _, _ = client.Run(ctx,
|
||||
"mariadb -u root -e \"SHOW STATUS LIKE 'wsrep_cluster_size'\" --skip-column-names 2>/dev/null | awk '{print $2}'")
|
||||
size = strings.TrimSpace(stdout)
|
||||
if size != "" && size != "0" {
|
||||
return fmt.Sprintf("cluster_size=%s", size)
|
||||
}
|
||||
return "not running"
|
||||
|
||||
case "redis":
|
||||
stdout, _, _, _ := client.Run(ctx,
|
||||
"docker exec $(docker ps -q --filter name=redis 2>/dev/null || echo none) "+
|
||||
"redis-cli ping 2>/dev/null")
|
||||
if strings.TrimSpace(stdout) == "PONG" {
|
||||
return "running"
|
||||
}
|
||||
stdout, _, _, _ = client.Run(ctx, "redis-cli ping 2>/dev/null")
|
||||
if strings.TrimSpace(stdout) == "PONG" {
|
||||
return "running"
|
||||
}
|
||||
return "not running"
|
||||
|
||||
case "forgejo-runner":
|
||||
stdout, _, _, _ := client.Run(ctx, "systemctl is-active forgejo-runner 2>/dev/null || docker ps --format '{{.Names}}' 2>/dev/null | grep -c runner")
|
||||
val := strings.TrimSpace(stdout)
|
||||
if val == "active" || (val != "0" && val != "") {
|
||||
return "running"
|
||||
}
|
||||
return "not running"
|
||||
|
||||
default:
|
||||
// Generic docker container check
|
||||
stdout, _, _, _ := client.Run(ctx,
|
||||
fmt.Sprintf("docker ps --format '{{.Names}}' 2>/dev/null | grep -c %s", service))
|
||||
if strings.TrimSpace(stdout) != "0" && strings.TrimSpace(stdout) != "" {
|
||||
return "running"
|
||||
}
|
||||
return "not running"
|
||||
}
|
||||
}
|
||||
|
||||
func printHostStatus(s hostStatus) {
|
||||
// Host header
|
||||
roleStyle := cli.DimStyle
|
||||
switch s.Host.Role {
|
||||
case "app":
|
||||
roleStyle = cli.SuccessStyle
|
||||
case "bastion":
|
||||
roleStyle = cli.WarningStyle
|
||||
case "builder":
|
||||
roleStyle = cli.InfoStyle
|
||||
}
|
||||
|
||||
cli.Print(" %s %s %s %s\n",
|
||||
cli.BoldStyle.Render(s.Name),
|
||||
cli.DimStyle.Render(s.Host.IP),
|
||||
roleStyle.Render(s.Host.Role),
|
||||
cli.DimStyle.Render(s.Host.FQDN))
|
||||
|
||||
if s.Error != nil {
|
||||
cli.Print(" %s %s\n", cli.ErrorStyle.Render("✗"), s.Error)
|
||||
return
|
||||
}
|
||||
|
||||
if !s.Connected {
|
||||
cli.Print(" %s SSH unreachable\n", cli.ErrorStyle.Render("✗"))
|
||||
return
|
||||
}
|
||||
|
||||
// Connection info
|
||||
cli.Print(" %s SSH %s",
|
||||
cli.SuccessStyle.Render("✓"),
|
||||
cli.DimStyle.Render(s.ConnTime.Round(time.Millisecond).String()))
|
||||
if s.OS != "" {
|
||||
cli.Print(" %s", cli.DimStyle.Render(s.OS))
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
if s.Docker != "" {
|
||||
cli.Print(" %s %s\n", cli.SuccessStyle.Render("✓"), cli.DimStyle.Render(s.Docker))
|
||||
}
|
||||
|
||||
// Services
|
||||
for _, svc := range s.Host.Services {
|
||||
status, ok := s.Services[svc]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
icon := cli.SuccessStyle.Render("●")
|
||||
style := cli.SuccessStyle
|
||||
if status == "not running" {
|
||||
icon = cli.ErrorStyle.Render("○")
|
||||
style = cli.ErrorStyle
|
||||
}
|
||||
|
||||
cli.Print(" %s %s %s\n", icon, svc, style.Render(status))
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
func checkLoadBalancer(ctx context.Context, token string) {
|
||||
hc := infra.NewHCloudClient(token)
|
||||
lbs, err := hc.ListLoadBalancers(ctx)
|
||||
if err != nil {
|
||||
cli.Print(" %s Load balancer: %s\n", cli.ErrorStyle.Render("✗"), err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(lbs) == 0 {
|
||||
cli.Print(" %s No load balancers found\n", cli.DimStyle.Render("○"))
|
||||
return
|
||||
}
|
||||
|
||||
for _, lb := range lbs {
|
||||
cli.Print(" %s LB: %s IP: %s Targets: %d\n",
|
||||
cli.SuccessStyle.Render("●"),
|
||||
cli.BoldStyle.Render(lb.Name),
|
||||
lb.PublicNet.IPv4.IP,
|
||||
len(lb.Targets))
|
||||
|
||||
for _, t := range lb.Targets {
|
||||
for _, hs := range t.HealthStatus {
|
||||
icon := cli.SuccessStyle.Render("●")
|
||||
if hs.Status != "healthy" {
|
||||
icon = cli.ErrorStyle.Render("○")
|
||||
}
|
||||
ip := ""
|
||||
if t.IP != nil {
|
||||
ip = t.IP.IP
|
||||
}
|
||||
cli.Print(" %s :%d %s %s\n", icon, hs.ListenPort, hs.Status, cli.DimStyle.Render(ip))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func loadConfig() (*infra.Config, string, error) {
|
||||
if infraFile != "" {
|
||||
cfg, err := infra.Load(infraFile)
|
||||
return cfg, infraFile, err
|
||||
}
|
||||
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
return infra.Discover(cwd)
|
||||
}
|
||||
|
|
@ -22,6 +22,7 @@
|
|||
// - monitor: Security monitoring aggregation
|
||||
// - gitea: Gitea instance management (repos, issues, PRs, mirrors)
|
||||
// - unifi: UniFi network management (sites, devices, clients)
|
||||
// - prod: Production infrastructure management
|
||||
|
||||
package variants
|
||||
|
||||
|
|
@ -45,6 +46,7 @@ import (
|
|||
_ "github.com/host-uk/core/internal/cmd/php"
|
||||
_ "github.com/host-uk/core/internal/cmd/pkgcmd"
|
||||
_ "github.com/host-uk/core/internal/cmd/plugin"
|
||||
_ "github.com/host-uk/core/internal/cmd/prod"
|
||||
_ "github.com/host-uk/core/internal/cmd/qa"
|
||||
_ "github.com/host-uk/core/internal/cmd/sdk"
|
||||
_ "github.com/host-uk/core/internal/cmd/security"
|
||||
|
|
|
|||
272
pkg/infra/cloudns.go
Normal file
272
pkg/infra/cloudns.go
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
package infra
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
const cloudnsBaseURL = "https://api.cloudns.net"
|
||||
|
||||
// CloudNSClient is an HTTP client for the CloudNS DNS API.
|
||||
type CloudNSClient struct {
|
||||
authID string
|
||||
password string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewCloudNSClient creates a new CloudNS API client.
|
||||
// Uses sub-auth-user (auth-id) authentication.
|
||||
func NewCloudNSClient(authID, password string) *CloudNSClient {
|
||||
return &CloudNSClient{
|
||||
authID: authID,
|
||||
password: password,
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// CloudNSZone represents a DNS zone.
|
||||
type CloudNSZone struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Zone string `json:"zone"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// CloudNSRecord represents a DNS record.
|
||||
type CloudNSRecord struct {
|
||||
ID string `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Host string `json:"host"`
|
||||
Record string `json:"record"`
|
||||
TTL string `json:"ttl"`
|
||||
Priority string `json:"priority,omitempty"`
|
||||
Status int `json:"status"`
|
||||
}
|
||||
|
||||
// ListZones returns all DNS zones.
|
||||
func (c *CloudNSClient) ListZones(ctx context.Context) ([]CloudNSZone, error) {
|
||||
params := c.authParams()
|
||||
params.Set("page", "1")
|
||||
params.Set("rows-per-page", "100")
|
||||
params.Set("search", "")
|
||||
|
||||
data, err := c.get(ctx, "/dns/list-zones.json", params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var zones []CloudNSZone
|
||||
if err := json.Unmarshal(data, &zones); err != nil {
|
||||
// CloudNS returns an empty object {} for no results instead of []
|
||||
return nil, nil
|
||||
}
|
||||
return zones, nil
|
||||
}
|
||||
|
||||
// ListRecords returns all DNS records for a zone.
|
||||
func (c *CloudNSClient) ListRecords(ctx context.Context, domain string) (map[string]CloudNSRecord, error) {
|
||||
params := c.authParams()
|
||||
params.Set("domain-name", domain)
|
||||
|
||||
data, err := c.get(ctx, "/dns/records.json", params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var records map[string]CloudNSRecord
|
||||
if err := json.Unmarshal(data, &records); err != nil {
|
||||
return nil, fmt.Errorf("parse records: %w", err)
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
// CreateRecord creates a DNS record. Returns the record ID.
|
||||
func (c *CloudNSClient) CreateRecord(ctx context.Context, domain, host, recordType, value string, ttl int) (string, error) {
|
||||
params := c.authParams()
|
||||
params.Set("domain-name", domain)
|
||||
params.Set("host", host)
|
||||
params.Set("record-type", recordType)
|
||||
params.Set("record", value)
|
||||
params.Set("ttl", strconv.Itoa(ttl))
|
||||
|
||||
data, err := c.post(ctx, "/dns/add-record.json", params)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Status string `json:"status"`
|
||||
StatusDescription string `json:"statusDescription"`
|
||||
Data struct {
|
||||
ID int `json:"id"`
|
||||
} `json:"data"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &result); err != nil {
|
||||
return "", fmt.Errorf("parse response: %w", err)
|
||||
}
|
||||
|
||||
if result.Status != "Success" {
|
||||
return "", fmt.Errorf("cloudns: %s", result.StatusDescription)
|
||||
}
|
||||
|
||||
return strconv.Itoa(result.Data.ID), nil
|
||||
}
|
||||
|
||||
// UpdateRecord updates an existing DNS record.
|
||||
func (c *CloudNSClient) UpdateRecord(ctx context.Context, domain, recordID, host, recordType, value string, ttl int) error {
|
||||
params := c.authParams()
|
||||
params.Set("domain-name", domain)
|
||||
params.Set("record-id", recordID)
|
||||
params.Set("host", host)
|
||||
params.Set("record-type", recordType)
|
||||
params.Set("record", value)
|
||||
params.Set("ttl", strconv.Itoa(ttl))
|
||||
|
||||
data, err := c.post(ctx, "/dns/mod-record.json", params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Status string `json:"status"`
|
||||
StatusDescription string `json:"statusDescription"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &result); err != nil {
|
||||
return fmt.Errorf("parse response: %w", err)
|
||||
}
|
||||
|
||||
if result.Status != "Success" {
|
||||
return fmt.Errorf("cloudns: %s", result.StatusDescription)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteRecord deletes a DNS record by ID.
|
||||
func (c *CloudNSClient) DeleteRecord(ctx context.Context, domain, recordID string) error {
|
||||
params := c.authParams()
|
||||
params.Set("domain-name", domain)
|
||||
params.Set("record-id", recordID)
|
||||
|
||||
data, err := c.post(ctx, "/dns/delete-record.json", params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Status string `json:"status"`
|
||||
StatusDescription string `json:"statusDescription"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &result); err != nil {
|
||||
return fmt.Errorf("parse response: %w", err)
|
||||
}
|
||||
|
||||
if result.Status != "Success" {
|
||||
return fmt.Errorf("cloudns: %s", result.StatusDescription)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnsureRecord creates or updates a DNS record to match the desired state.
|
||||
// Returns true if a change was made.
|
||||
func (c *CloudNSClient) EnsureRecord(ctx context.Context, domain, host, recordType, value string, ttl int) (bool, error) {
|
||||
records, err := c.ListRecords(ctx, domain)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("list records: %w", err)
|
||||
}
|
||||
|
||||
// Check if record already exists
|
||||
for id, r := range records {
|
||||
if r.Host == host && r.Type == recordType {
|
||||
if r.Record == value {
|
||||
return false, nil // Already correct
|
||||
}
|
||||
// Update existing record
|
||||
if err := c.UpdateRecord(ctx, domain, id, host, recordType, value, ttl); err != nil {
|
||||
return false, fmt.Errorf("update record: %w", err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Create new record
|
||||
if _, err := c.CreateRecord(ctx, domain, host, recordType, value, ttl); err != nil {
|
||||
return false, fmt.Errorf("create record: %w", err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// SetACMEChallenge creates a DNS-01 ACME challenge TXT record.
|
||||
func (c *CloudNSClient) SetACMEChallenge(ctx context.Context, domain, value string) (string, error) {
|
||||
return c.CreateRecord(ctx, domain, "_acme-challenge", "TXT", value, 60)
|
||||
}
|
||||
|
||||
// ClearACMEChallenge removes the DNS-01 ACME challenge TXT record.
|
||||
func (c *CloudNSClient) ClearACMEChallenge(ctx context.Context, domain string) error {
|
||||
records, err := c.ListRecords(ctx, domain)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for id, r := range records {
|
||||
if r.Host == "_acme-challenge" && r.Type == "TXT" {
|
||||
if err := c.DeleteRecord(ctx, domain, id); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *CloudNSClient) authParams() url.Values {
|
||||
params := url.Values{}
|
||||
params.Set("auth-id", c.authID)
|
||||
params.Set("auth-password", c.password)
|
||||
return params
|
||||
}
|
||||
|
||||
func (c *CloudNSClient) get(ctx context.Context, path string, params url.Values) ([]byte, error) {
|
||||
u := cloudnsBaseURL + path + "?" + params.Encode()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c.doRaw(req)
|
||||
}
|
||||
|
||||
func (c *CloudNSClient) post(ctx context.Context, path string, params url.Values) ([]byte, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, cloudnsBaseURL+path, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.URL.RawQuery = params.Encode()
|
||||
return c.doRaw(req)
|
||||
}
|
||||
|
||||
func (c *CloudNSClient) doRaw(req *http.Request) ([]byte, error) {
|
||||
resp, err := c.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cloudns API: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return nil, fmt.Errorf("cloudns API %d: %s", resp.StatusCode, string(data))
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
300
pkg/infra/config.go
Normal file
300
pkg/infra/config.go
Normal file
|
|
@ -0,0 +1,300 @@
|
|||
// Package infra provides infrastructure configuration and API clients
|
||||
// for managing the Host UK production environment.
|
||||
package infra
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Config is the top-level infrastructure configuration parsed from infra.yaml.
|
||||
type Config struct {
|
||||
Hosts map[string]*Host `yaml:"hosts"`
|
||||
LoadBalancer LoadBalancer `yaml:"load_balancer"`
|
||||
Network Network `yaml:"network"`
|
||||
DNS DNS `yaml:"dns"`
|
||||
SSL SSL `yaml:"ssl"`
|
||||
Database Database `yaml:"database"`
|
||||
Cache Cache `yaml:"cache"`
|
||||
Containers map[string]*Container `yaml:"containers"`
|
||||
S3 S3Config `yaml:"s3"`
|
||||
CDN CDN `yaml:"cdn"`
|
||||
CICD CICD `yaml:"cicd"`
|
||||
Monitoring Monitoring `yaml:"monitoring"`
|
||||
Backups Backups `yaml:"backups"`
|
||||
}
|
||||
|
||||
// Host represents a server in the infrastructure.
|
||||
type Host struct {
|
||||
FQDN string `yaml:"fqdn"`
|
||||
IP string `yaml:"ip"`
|
||||
PrivateIP string `yaml:"private_ip,omitempty"`
|
||||
Type string `yaml:"type"` // hcloud, hrobot
|
||||
Role string `yaml:"role"` // bastion, app, builder
|
||||
SSH SSHConf `yaml:"ssh"`
|
||||
Services []string `yaml:"services"`
|
||||
}
|
||||
|
||||
// SSHConf holds SSH connection details for a host.
|
||||
type SSHConf struct {
|
||||
User string `yaml:"user"`
|
||||
Key string `yaml:"key"`
|
||||
Port int `yaml:"port"`
|
||||
}
|
||||
|
||||
// LoadBalancer represents a Hetzner managed load balancer.
|
||||
type LoadBalancer struct {
|
||||
Name string `yaml:"name"`
|
||||
FQDN string `yaml:"fqdn"`
|
||||
Provider string `yaml:"provider"`
|
||||
Type string `yaml:"type"`
|
||||
Location string `yaml:"location"`
|
||||
Algorithm string `yaml:"algorithm"`
|
||||
Backends []Backend `yaml:"backends"`
|
||||
Health HealthCheck `yaml:"health_check"`
|
||||
Listeners []Listener `yaml:"listeners"`
|
||||
SSL LBCert `yaml:"ssl"`
|
||||
}
|
||||
|
||||
// Backend is a load balancer backend target.
|
||||
type Backend struct {
|
||||
Host string `yaml:"host"`
|
||||
Port int `yaml:"port"`
|
||||
}
|
||||
|
||||
// HealthCheck configures load balancer health checking.
|
||||
type HealthCheck struct {
|
||||
Protocol string `yaml:"protocol"`
|
||||
Path string `yaml:"path"`
|
||||
Interval int `yaml:"interval"`
|
||||
}
|
||||
|
||||
// Listener maps a frontend port to a backend port.
|
||||
type Listener struct {
|
||||
Frontend int `yaml:"frontend"`
|
||||
Backend int `yaml:"backend"`
|
||||
Protocol string `yaml:"protocol"`
|
||||
ProxyProtocol bool `yaml:"proxy_protocol"`
|
||||
}
|
||||
|
||||
// LBCert holds the SSL certificate configuration for the load balancer.
|
||||
type LBCert struct {
|
||||
Certificate string `yaml:"certificate"`
|
||||
SAN []string `yaml:"san"`
|
||||
}
|
||||
|
||||
// Network describes the private network.
|
||||
type Network struct {
|
||||
CIDR string `yaml:"cidr"`
|
||||
Name string `yaml:"name"`
|
||||
}
|
||||
|
||||
// DNS holds DNS provider configuration and zone records.
|
||||
type DNS struct {
|
||||
Provider string `yaml:"provider"`
|
||||
Nameservers []string `yaml:"nameservers"`
|
||||
Zones map[string]*Zone `yaml:"zones"`
|
||||
}
|
||||
|
||||
// Zone is a DNS zone with its records.
|
||||
type Zone struct {
|
||||
Records []DNSRecord `yaml:"records"`
|
||||
}
|
||||
|
||||
// DNSRecord is a single DNS record.
|
||||
type DNSRecord struct {
|
||||
Name string `yaml:"name"`
|
||||
Type string `yaml:"type"`
|
||||
Value string `yaml:"value"`
|
||||
TTL int `yaml:"ttl"`
|
||||
}
|
||||
|
||||
// SSL holds SSL certificate configuration.
|
||||
type SSL struct {
|
||||
Wildcard WildcardCert `yaml:"wildcard"`
|
||||
}
|
||||
|
||||
// WildcardCert describes a wildcard SSL certificate.
|
||||
type WildcardCert struct {
|
||||
Domains []string `yaml:"domains"`
|
||||
Method string `yaml:"method"`
|
||||
DNSProvider string `yaml:"dns_provider"`
|
||||
Termination string `yaml:"termination"`
|
||||
}
|
||||
|
||||
// Database describes the database cluster.
|
||||
type Database struct {
|
||||
Engine string `yaml:"engine"`
|
||||
Version string `yaml:"version"`
|
||||
Cluster string `yaml:"cluster"`
|
||||
Nodes []DBNode `yaml:"nodes"`
|
||||
SSTMethod string `yaml:"sst_method"`
|
||||
Backup BackupConfig `yaml:"backup"`
|
||||
}
|
||||
|
||||
// DBNode is a database cluster node.
|
||||
type DBNode struct {
|
||||
Host string `yaml:"host"`
|
||||
Port int `yaml:"port"`
|
||||
}
|
||||
|
||||
// BackupConfig describes automated backup settings.
|
||||
type BackupConfig struct {
|
||||
Schedule string `yaml:"schedule"`
|
||||
Destination string `yaml:"destination"`
|
||||
Bucket string `yaml:"bucket"`
|
||||
Prefix string `yaml:"prefix"`
|
||||
}
|
||||
|
||||
// Cache describes the cache/session cluster.
|
||||
type Cache struct {
|
||||
Engine string `yaml:"engine"`
|
||||
Version string `yaml:"version"`
|
||||
Sentinel bool `yaml:"sentinel"`
|
||||
Nodes []CacheNode `yaml:"nodes"`
|
||||
}
|
||||
|
||||
// CacheNode is a cache cluster node.
|
||||
type CacheNode struct {
|
||||
Host string `yaml:"host"`
|
||||
Port int `yaml:"port"`
|
||||
}
|
||||
|
||||
// Container describes a container deployment.
|
||||
type Container struct {
|
||||
Image string `yaml:"image"`
|
||||
Port int `yaml:"port,omitempty"`
|
||||
Runtime string `yaml:"runtime,omitempty"`
|
||||
Command string `yaml:"command,omitempty"`
|
||||
Replicas int `yaml:"replicas,omitempty"`
|
||||
DependsOn []string `yaml:"depends_on,omitempty"`
|
||||
}
|
||||
|
||||
// S3Config describes object storage.
|
||||
type S3Config struct {
|
||||
Endpoint string `yaml:"endpoint"`
|
||||
Buckets map[string]*S3Bucket `yaml:"buckets"`
|
||||
}
|
||||
|
||||
// S3Bucket is an S3 bucket configuration.
|
||||
type S3Bucket struct {
|
||||
Purpose string `yaml:"purpose"`
|
||||
Paths []string `yaml:"paths"`
|
||||
}
|
||||
|
||||
// CDN describes CDN configuration.
|
||||
type CDN struct {
|
||||
Provider string `yaml:"provider"`
|
||||
Origin string `yaml:"origin"`
|
||||
Zones []string `yaml:"zones"`
|
||||
}
|
||||
|
||||
// CICD describes CI/CD configuration.
|
||||
type CICD struct {
|
||||
Provider string `yaml:"provider"`
|
||||
URL string `yaml:"url"`
|
||||
Runner string `yaml:"runner"`
|
||||
Registry string `yaml:"registry"`
|
||||
DeployHook string `yaml:"deploy_hook"`
|
||||
}
|
||||
|
||||
// Monitoring describes monitoring configuration.
|
||||
type Monitoring struct {
|
||||
HealthEndpoints []HealthEndpoint `yaml:"health_endpoints"`
|
||||
Alerts map[string]int `yaml:"alerts"`
|
||||
}
|
||||
|
||||
// HealthEndpoint is a URL to monitor.
|
||||
type HealthEndpoint struct {
|
||||
URL string `yaml:"url"`
|
||||
Interval int `yaml:"interval"`
|
||||
}
|
||||
|
||||
// Backups describes backup schedules.
|
||||
type Backups struct {
|
||||
Daily []BackupJob `yaml:"daily"`
|
||||
Weekly []BackupJob `yaml:"weekly"`
|
||||
}
|
||||
|
||||
// BackupJob is a scheduled backup task.
|
||||
type BackupJob struct {
|
||||
Name string `yaml:"name"`
|
||||
Type string `yaml:"type"`
|
||||
Destination string `yaml:"destination,omitempty"`
|
||||
Hosts []string `yaml:"hosts,omitempty"`
|
||||
}
|
||||
|
||||
// Load reads and parses an infra.yaml file.
|
||||
func Load(path string) (*Config, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read infra config: %w", err)
|
||||
}
|
||||
|
||||
var cfg Config
|
||||
if err := yaml.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, fmt.Errorf("parse infra config: %w", err)
|
||||
}
|
||||
|
||||
// Expand SSH key paths
|
||||
for _, h := range cfg.Hosts {
|
||||
if h.SSH.Key != "" {
|
||||
h.SSH.Key = expandPath(h.SSH.Key)
|
||||
}
|
||||
if h.SSH.Port == 0 {
|
||||
h.SSH.Port = 22
|
||||
}
|
||||
}
|
||||
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
// Discover searches for infra.yaml in the given directory and parent directories.
|
||||
func Discover(startDir string) (*Config, string, error) {
|
||||
dir := startDir
|
||||
for {
|
||||
path := filepath.Join(dir, "infra.yaml")
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
cfg, err := Load(path)
|
||||
return cfg, path, err
|
||||
}
|
||||
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir {
|
||||
break
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
return nil, "", fmt.Errorf("infra.yaml not found (searched from %s)", startDir)
|
||||
}
|
||||
|
||||
// HostsByRole returns all hosts matching the given role.
|
||||
func (c *Config) HostsByRole(role string) map[string]*Host {
|
||||
result := make(map[string]*Host)
|
||||
for name, h := range c.Hosts {
|
||||
if h.Role == role {
|
||||
result[name] = h
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// AppServers returns hosts with role "app".
|
||||
func (c *Config) AppServers() map[string]*Host {
|
||||
return c.HostsByRole("app")
|
||||
}
|
||||
|
||||
// expandPath expands ~ to home directory.
|
||||
func expandPath(path string) string {
|
||||
if len(path) > 0 && path[0] == '~' {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return path
|
||||
}
|
||||
return filepath.Join(home, path[1:])
|
||||
}
|
||||
return path
|
||||
}
|
||||
100
pkg/infra/config_test.go
Normal file
100
pkg/infra/config_test.go
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
package infra
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLoad_Good(t *testing.T) {
|
||||
// Find infra.yaml relative to test
|
||||
// Walk up from test dir to find it
|
||||
dir, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
cfg, path, err := Discover(dir)
|
||||
if err != nil {
|
||||
t.Skipf("infra.yaml not found from %s: %v", dir, err)
|
||||
}
|
||||
|
||||
t.Logf("Loaded %s", path)
|
||||
|
||||
if len(cfg.Hosts) == 0 {
|
||||
t.Error("expected at least one host")
|
||||
}
|
||||
|
||||
// Check required hosts exist
|
||||
for _, name := range []string{"noc", "de", "de2", "build"} {
|
||||
if _, ok := cfg.Hosts[name]; !ok {
|
||||
t.Errorf("expected host %q in config", name)
|
||||
}
|
||||
}
|
||||
|
||||
// Check de host details
|
||||
de := cfg.Hosts["de"]
|
||||
if de.IP != "116.202.82.115" {
|
||||
t.Errorf("de IP = %q, want 116.202.82.115", de.IP)
|
||||
}
|
||||
if de.Role != "app" {
|
||||
t.Errorf("de role = %q, want app", de.Role)
|
||||
}
|
||||
|
||||
// Check LB config
|
||||
if cfg.LoadBalancer.Name != "hermes" {
|
||||
t.Errorf("LB name = %q, want hermes", cfg.LoadBalancer.Name)
|
||||
}
|
||||
if cfg.LoadBalancer.Type != "lb11" {
|
||||
t.Errorf("LB type = %q, want lb11", cfg.LoadBalancer.Type)
|
||||
}
|
||||
if len(cfg.LoadBalancer.Backends) != 2 {
|
||||
t.Errorf("LB backends = %d, want 2", len(cfg.LoadBalancer.Backends))
|
||||
}
|
||||
|
||||
// Check app servers helper
|
||||
apps := cfg.AppServers()
|
||||
if len(apps) != 2 {
|
||||
t.Errorf("AppServers() = %d, want 2", len(apps))
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad_Bad(t *testing.T) {
|
||||
_, err := Load("/nonexistent/infra.yaml")
|
||||
if err == nil {
|
||||
t.Error("expected error for nonexistent file")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad_Ugly(t *testing.T) {
|
||||
// Invalid YAML
|
||||
tmp := filepath.Join(t.TempDir(), "infra.yaml")
|
||||
if err := os.WriteFile(tmp, []byte("{{invalid yaml"), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err := Load(tmp)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid YAML")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExpandPath(t *testing.T) {
|
||||
home, _ := os.UserHomeDir()
|
||||
|
||||
tests := []struct {
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{"~/.ssh/id_rsa", filepath.Join(home, ".ssh/id_rsa")},
|
||||
{"/absolute/path", "/absolute/path"},
|
||||
{"relative/path", "relative/path"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got := expandPath(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("expandPath(%q) = %q, want %q", tt.input, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
381
pkg/infra/hetzner.go
Normal file
381
pkg/infra/hetzner.go
Normal file
|
|
@ -0,0 +1,381 @@
|
|||
package infra
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
hcloudBaseURL = "https://api.hetzner.cloud/v1"
|
||||
hrobotBaseURL = "https://robot-ws.your-server.de"
|
||||
)
|
||||
|
||||
// HCloudClient is an HTTP client for the Hetzner Cloud API.
|
||||
type HCloudClient struct {
|
||||
token string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewHCloudClient creates a new Hetzner Cloud API client.
|
||||
func NewHCloudClient(token string) *HCloudClient {
|
||||
return &HCloudClient{
|
||||
token: token,
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// HCloudServer represents a Hetzner Cloud server.
|
||||
type HCloudServer struct {
|
||||
ID int `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Status string `json:"status"`
|
||||
PublicNet HCloudPublicNet `json:"public_net"`
|
||||
PrivateNet []HCloudPrivateNet `json:"private_net"`
|
||||
ServerType HCloudServerType `json:"server_type"`
|
||||
Datacenter HCloudDatacenter `json:"datacenter"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
}
|
||||
|
||||
// HCloudPublicNet holds public network info.
|
||||
type HCloudPublicNet struct {
|
||||
IPv4 HCloudIPv4 `json:"ipv4"`
|
||||
}
|
||||
|
||||
// HCloudIPv4 holds an IPv4 address.
|
||||
type HCloudIPv4 struct {
|
||||
IP string `json:"ip"`
|
||||
}
|
||||
|
||||
// HCloudPrivateNet holds private network info.
|
||||
type HCloudPrivateNet struct {
|
||||
IP string `json:"ip"`
|
||||
Network int `json:"network"`
|
||||
}
|
||||
|
||||
// HCloudServerType holds server type info.
|
||||
type HCloudServerType struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Cores int `json:"cores"`
|
||||
Memory float64 `json:"memory"`
|
||||
Disk int `json:"disk"`
|
||||
}
|
||||
|
||||
// HCloudDatacenter holds datacenter info.
|
||||
type HCloudDatacenter struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
}
|
||||
|
||||
// HCloudLoadBalancer represents a Hetzner Cloud load balancer.
|
||||
type HCloudLoadBalancer struct {
|
||||
ID int `json:"id"`
|
||||
Name string `json:"name"`
|
||||
PublicNet HCloudLBPublicNet `json:"public_net"`
|
||||
Algorithm HCloudLBAlgorithm `json:"algorithm"`
|
||||
Services []HCloudLBService `json:"services"`
|
||||
Targets []HCloudLBTarget `json:"targets"`
|
||||
Location HCloudDatacenter `json:"location"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
}
|
||||
|
||||
// HCloudLBPublicNet holds LB public network info.
|
||||
type HCloudLBPublicNet struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
IPv4 HCloudIPv4 `json:"ipv4"`
|
||||
}
|
||||
|
||||
// HCloudLBAlgorithm holds the LB algorithm.
|
||||
type HCloudLBAlgorithm struct {
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
// HCloudLBService describes an LB listener.
|
||||
type HCloudLBService struct {
|
||||
Protocol string `json:"protocol"`
|
||||
ListenPort int `json:"listen_port"`
|
||||
DestinationPort int `json:"destination_port"`
|
||||
Proxyprotocol bool `json:"proxyprotocol"`
|
||||
HTTP *HCloudLBHTTP `json:"http,omitempty"`
|
||||
HealthCheck *HCloudLBHealthCheck `json:"health_check,omitempty"`
|
||||
}
|
||||
|
||||
// HCloudLBHTTP holds HTTP-specific LB options.
|
||||
type HCloudLBHTTP struct {
|
||||
RedirectHTTP bool `json:"redirect_http"`
|
||||
}
|
||||
|
||||
// HCloudLBHealthCheck holds LB health check config.
|
||||
type HCloudLBHealthCheck struct {
|
||||
Protocol string `json:"protocol"`
|
||||
Port int `json:"port"`
|
||||
Interval int `json:"interval"`
|
||||
Timeout int `json:"timeout"`
|
||||
Retries int `json:"retries"`
|
||||
HTTP *HCloudLBHCHTTP `json:"http,omitempty"`
|
||||
}
|
||||
|
||||
// HCloudLBHCHTTP holds HTTP health check options.
|
||||
type HCloudLBHCHTTP struct {
|
||||
Path string `json:"path"`
|
||||
StatusCode string `json:"status_codes"`
|
||||
}
|
||||
|
||||
// HCloudLBTarget is a load balancer backend target.
|
||||
type HCloudLBTarget struct {
|
||||
Type string `json:"type"`
|
||||
IP *HCloudLBTargetIP `json:"ip,omitempty"`
|
||||
Server *HCloudLBTargetServer `json:"server,omitempty"`
|
||||
HealthStatus []HCloudLBHealthStatus `json:"health_status"`
|
||||
}
|
||||
|
||||
// HCloudLBTargetIP is an IP-based LB target.
|
||||
type HCloudLBTargetIP struct {
|
||||
IP string `json:"ip"`
|
||||
}
|
||||
|
||||
// HCloudLBTargetServer is a server-based LB target.
|
||||
type HCloudLBTargetServer struct {
|
||||
ID int `json:"id"`
|
||||
}
|
||||
|
||||
// HCloudLBHealthStatus holds target health info.
|
||||
type HCloudLBHealthStatus struct {
|
||||
ListenPort int `json:"listen_port"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// HCloudLBCreateRequest holds load balancer creation params.
|
||||
type HCloudLBCreateRequest struct {
|
||||
Name string `json:"name"`
|
||||
LoadBalancerType string `json:"load_balancer_type"`
|
||||
Location string `json:"location"`
|
||||
Algorithm HCloudLBAlgorithm `json:"algorithm"`
|
||||
Services []HCloudLBService `json:"services"`
|
||||
Targets []HCloudLBCreateTarget `json:"targets"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
}
|
||||
|
||||
// HCloudLBCreateTarget is a target for LB creation.
|
||||
type HCloudLBCreateTarget struct {
|
||||
Type string `json:"type"`
|
||||
IP *HCloudLBTargetIP `json:"ip,omitempty"`
|
||||
}
|
||||
|
||||
// ListServers returns all Hetzner Cloud servers.
|
||||
func (c *HCloudClient) ListServers(ctx context.Context) ([]HCloudServer, error) {
|
||||
var result struct {
|
||||
Servers []HCloudServer `json:"servers"`
|
||||
}
|
||||
if err := c.get(ctx, "/servers", &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result.Servers, nil
|
||||
}
|
||||
|
||||
// ListLoadBalancers returns all load balancers.
|
||||
func (c *HCloudClient) ListLoadBalancers(ctx context.Context) ([]HCloudLoadBalancer, error) {
|
||||
var result struct {
|
||||
LoadBalancers []HCloudLoadBalancer `json:"load_balancers"`
|
||||
}
|
||||
if err := c.get(ctx, "/load_balancers", &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result.LoadBalancers, nil
|
||||
}
|
||||
|
||||
// GetLoadBalancer returns a load balancer by ID.
|
||||
func (c *HCloudClient) GetLoadBalancer(ctx context.Context, id int) (*HCloudLoadBalancer, error) {
|
||||
var result struct {
|
||||
LoadBalancer HCloudLoadBalancer `json:"load_balancer"`
|
||||
}
|
||||
if err := c.get(ctx, fmt.Sprintf("/load_balancers/%d", id), &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &result.LoadBalancer, nil
|
||||
}
|
||||
|
||||
// CreateLoadBalancer creates a new load balancer.
|
||||
func (c *HCloudClient) CreateLoadBalancer(ctx context.Context, req HCloudLBCreateRequest) (*HCloudLoadBalancer, error) {
|
||||
body, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal request: %w", err)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
LoadBalancer HCloudLoadBalancer `json:"load_balancer"`
|
||||
}
|
||||
if err := c.post(ctx, "/load_balancers", body, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &result.LoadBalancer, nil
|
||||
}
|
||||
|
||||
// DeleteLoadBalancer deletes a load balancer by ID.
|
||||
func (c *HCloudClient) DeleteLoadBalancer(ctx context.Context, id int) error {
|
||||
return c.delete(ctx, fmt.Sprintf("/load_balancers/%d", id))
|
||||
}
|
||||
|
||||
// CreateSnapshot creates a server snapshot.
|
||||
func (c *HCloudClient) CreateSnapshot(ctx context.Context, serverID int, description string) error {
|
||||
body, _ := json.Marshal(map[string]string{
|
||||
"description": description,
|
||||
"type": "snapshot",
|
||||
})
|
||||
return c.post(ctx, fmt.Sprintf("/servers/%d/actions/create_image", serverID), body, nil)
|
||||
}
|
||||
|
||||
func (c *HCloudClient) get(ctx context.Context, path string, result any) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, hcloudBaseURL+path, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.do(req, result)
|
||||
}
|
||||
|
||||
func (c *HCloudClient) post(ctx context.Context, path string, body []byte, result any) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, hcloudBaseURL+path, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
return c.do(req, result)
|
||||
}
|
||||
|
||||
func (c *HCloudClient) delete(ctx context.Context, path string) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodDelete, hcloudBaseURL+path, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.do(req, nil)
|
||||
}
|
||||
|
||||
func (c *HCloudClient) do(req *http.Request, result any) error {
|
||||
req.Header.Set("Authorization", "Bearer "+c.token)
|
||||
|
||||
resp, err := c.client.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("hcloud API: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
var apiErr struct {
|
||||
Error struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
}
|
||||
if json.Unmarshal(data, &apiErr) == nil && apiErr.Error.Message != "" {
|
||||
return fmt.Errorf("hcloud API %d: %s — %s", resp.StatusCode, apiErr.Error.Code, apiErr.Error.Message)
|
||||
}
|
||||
return fmt.Errorf("hcloud API %d: %s", resp.StatusCode, string(data))
|
||||
}
|
||||
|
||||
if result != nil {
|
||||
if err := json.Unmarshal(data, result); err != nil {
|
||||
return fmt.Errorf("decode response: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- Hetzner Robot API ---
|
||||
|
||||
// HRobotClient is an HTTP client for the Hetzner Robot API.
|
||||
type HRobotClient struct {
|
||||
user string
|
||||
password string
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewHRobotClient creates a new Hetzner Robot API client.
|
||||
func NewHRobotClient(user, password string) *HRobotClient {
|
||||
return &HRobotClient{
|
||||
user: user,
|
||||
password: password,
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// HRobotServer represents a Hetzner Robot dedicated server.
|
||||
type HRobotServer struct {
|
||||
ServerIP string `json:"server_ip"`
|
||||
ServerName string `json:"server_name"`
|
||||
Product string `json:"product"`
|
||||
Datacenter string `json:"dc"`
|
||||
Status string `json:"status"`
|
||||
Cancelled bool `json:"cancelled"`
|
||||
PaidUntil string `json:"paid_until"`
|
||||
}
|
||||
|
||||
// ListServers returns all Robot dedicated servers.
|
||||
func (c *HRobotClient) ListServers(ctx context.Context) ([]HRobotServer, error) {
|
||||
var raw []struct {
|
||||
Server HRobotServer `json:"server"`
|
||||
}
|
||||
if err := c.get(ctx, "/server", &raw); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
servers := make([]HRobotServer, len(raw))
|
||||
for i, s := range raw {
|
||||
servers[i] = s.Server
|
||||
}
|
||||
return servers, nil
|
||||
}
|
||||
|
||||
// GetServer returns a Robot server by IP.
|
||||
func (c *HRobotClient) GetServer(ctx context.Context, ip string) (*HRobotServer, error) {
|
||||
var raw struct {
|
||||
Server HRobotServer `json:"server"`
|
||||
}
|
||||
if err := c.get(ctx, "/server/"+ip, &raw); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &raw.Server, nil
|
||||
}
|
||||
|
||||
func (c *HRobotClient) get(ctx context.Context, path string, result any) error {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, hrobotBaseURL+path, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.SetBasicAuth(c.user, c.password)
|
||||
|
||||
resp, err := c.client.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("hrobot API: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read response: %w", err)
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return fmt.Errorf("hrobot API %d: %s", resp.StatusCode, string(data))
|
||||
}
|
||||
|
||||
if result != nil {
|
||||
if err := json.Unmarshal(data, result); err != nil {
|
||||
return fmt.Errorf("decode response: %w", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
63
playbooks/galera-backup.yml
Normal file
63
playbooks/galera-backup.yml
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
# Galera Database Backup
|
||||
# Dumps the database and uploads to Hetzner S3
|
||||
#
|
||||
# Usage:
|
||||
# core deploy ansible playbooks/galera-backup.yml -i playbooks/inventory.yml -l de
|
||||
---
|
||||
- name: Backup Galera Database to S3
|
||||
hosts: app_servers
|
||||
become: true
|
||||
vars:
|
||||
db_root_password: "{{ lookup('env', 'DB_ROOT_PASSWORD') }}"
|
||||
s3_endpoint: "{{ lookup('env', 'HETZNER_S3_ENDPOINT') | default('fsn1.your-objectstorage.com', true) }}"
|
||||
s3_bucket: "{{ lookup('env', 'HETZNER_S3_BUCKET') | default('hostuk', true) }}"
|
||||
s3_access_key: "{{ lookup('env', 'HETZNER_S3_ACCESS_KEY') }}"
|
||||
s3_secret_key: "{{ lookup('env', 'HETZNER_S3_SECRET_KEY') }}"
|
||||
backup_prefix: backup/galera
|
||||
backup_retain_days: 30
|
||||
|
||||
tasks:
|
||||
- name: Create backup directory
|
||||
file:
|
||||
path: /opt/backup
|
||||
state: directory
|
||||
mode: "0700"
|
||||
|
||||
- name: Dump database
|
||||
shell: |
|
||||
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||
DUMP_FILE="/opt/backup/hostuk-${TIMESTAMP}-{{ galera_node_name }}.sql.gz"
|
||||
docker exec galera mariadb-dump \
|
||||
-u root -p{{ db_root_password }} \
|
||||
--all-databases \
|
||||
--single-transaction \
|
||||
--routines \
|
||||
--triggers \
|
||||
--events \
|
||||
| gzip > "${DUMP_FILE}"
|
||||
echo "${DUMP_FILE}"
|
||||
register: dump_result
|
||||
|
||||
- name: Install s3cmd if missing
|
||||
shell: |
|
||||
which s3cmd 2>/dev/null || pip3 install s3cmd
|
||||
changed_when: false
|
||||
|
||||
- name: Upload to S3
|
||||
shell: |
|
||||
s3cmd put {{ dump_result.stdout | trim }} \
|
||||
s3://{{ s3_bucket }}/{{ backup_prefix }}/$(basename {{ dump_result.stdout | trim }}) \
|
||||
--host={{ s3_endpoint }} \
|
||||
--host-bucket='%(bucket)s.{{ s3_endpoint }}' \
|
||||
--access_key={{ s3_access_key }} \
|
||||
--secret_key={{ s3_secret_key }}
|
||||
when: s3_access_key != ""
|
||||
|
||||
- name: Clean old local backups
|
||||
shell: |
|
||||
find /opt/backup -name "hostuk-*.sql.gz" -mtime +{{ backup_retain_days }} -delete
|
||||
changed_when: false
|
||||
|
||||
- name: Show backup result
|
||||
debug:
|
||||
msg: "Backup completed: {{ dump_result.stdout | trim }}"
|
||||
96
playbooks/galera-deploy.yml
Normal file
96
playbooks/galera-deploy.yml
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
# MariaDB Galera Cluster Deployment
|
||||
# Deploys a 2-node Galera cluster on de + de2
|
||||
#
|
||||
# Usage:
|
||||
# core deploy ansible playbooks/galera-deploy.yml -i playbooks/inventory.yml
|
||||
# core deploy ansible playbooks/galera-deploy.yml -i playbooks/inventory.yml -l de # Single node
|
||||
#
|
||||
# First-time bootstrap:
|
||||
# Set galera_bootstrap=true for the first node:
|
||||
# core deploy ansible playbooks/galera-deploy.yml -i playbooks/inventory.yml -l de -e galera_bootstrap=true
|
||||
---
|
||||
- name: Deploy MariaDB Galera Cluster
|
||||
hosts: app_servers
|
||||
become: true
|
||||
vars:
|
||||
mariadb_version: "11"
|
||||
galera_cluster_address: "gcomm://116.202.82.115,88.99.195.41"
|
||||
galera_bootstrap: false
|
||||
db_root_password: "{{ lookup('env', 'DB_ROOT_PASSWORD') }}"
|
||||
db_password: "{{ lookup('env', 'DB_PASSWORD') }}"
|
||||
|
||||
tasks:
|
||||
- name: Create MariaDB data directory
|
||||
file:
|
||||
path: /opt/galera/data
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Create MariaDB config directory
|
||||
file:
|
||||
path: /opt/galera/conf.d
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Write Galera configuration
|
||||
copy:
|
||||
dest: /opt/galera/conf.d/galera.cnf
|
||||
content: |
|
||||
[mysqld]
|
||||
wsrep_on=ON
|
||||
wsrep_provider=/usr/lib/galera/libgalera_smm.so
|
||||
wsrep_cluster_name={{ galera_cluster_name }}
|
||||
wsrep_cluster_address={{ 'gcomm://' if galera_bootstrap else galera_cluster_address }}
|
||||
wsrep_node_address={{ galera_node_address }}
|
||||
wsrep_node_name={{ galera_node_name }}
|
||||
wsrep_sst_method={{ galera_sst_method }}
|
||||
binlog_format=ROW
|
||||
default_storage_engine=InnoDB
|
||||
innodb_autoinc_lock_mode=2
|
||||
innodb_buffer_pool_size=1G
|
||||
innodb_log_file_size=256M
|
||||
character_set_server=utf8mb4
|
||||
collation_server=utf8mb4_unicode_ci
|
||||
|
||||
- name: Stop existing MariaDB container
|
||||
shell: docker stop galera 2>/dev/null || true
|
||||
changed_when: false
|
||||
|
||||
- name: Remove existing MariaDB container
|
||||
shell: docker rm galera 2>/dev/null || true
|
||||
changed_when: false
|
||||
|
||||
- name: Start MariaDB Galera container
|
||||
shell: |
|
||||
docker run -d \
|
||||
--name galera \
|
||||
--restart unless-stopped \
|
||||
--network host \
|
||||
-v /opt/galera/data:/var/lib/mysql \
|
||||
-v /opt/galera/conf.d:/etc/mysql/conf.d \
|
||||
-e MARIADB_ROOT_PASSWORD={{ db_root_password }} \
|
||||
-e MARIADB_DATABASE={{ db_name }} \
|
||||
-e MARIADB_USER={{ db_user }} \
|
||||
-e MARIADB_PASSWORD={{ db_password }} \
|
||||
mariadb:{{ mariadb_version }}
|
||||
|
||||
- name: Wait for MariaDB to be ready
|
||||
shell: |
|
||||
for i in $(seq 1 60); do
|
||||
docker exec galera mariadb -u root -p{{ db_root_password }} -e "SELECT 1" 2>/dev/null && exit 0
|
||||
sleep 2
|
||||
done
|
||||
exit 1
|
||||
changed_when: false
|
||||
|
||||
- name: Check Galera cluster status
|
||||
shell: |
|
||||
docker exec galera mariadb -u root -p{{ db_root_password }} \
|
||||
-e "SHOW STATUS WHERE Variable_name IN ('wsrep_cluster_size','wsrep_ready','wsrep_cluster_status')" \
|
||||
--skip-column-names
|
||||
register: galera_status
|
||||
changed_when: false
|
||||
|
||||
- name: Display cluster status
|
||||
debug:
|
||||
var: galera_status.stdout_lines
|
||||
36
playbooks/inventory.yml
Normal file
36
playbooks/inventory.yml
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
# Ansible inventory for Host UK production
|
||||
# Used by: core deploy ansible <playbook> -i playbooks/inventory.yml
|
||||
all:
|
||||
vars:
|
||||
ansible_user: root
|
||||
ansible_ssh_private_key_file: ~/.ssh/hostuk
|
||||
|
||||
children:
|
||||
bastion:
|
||||
hosts:
|
||||
noc:
|
||||
ansible_host: 77.42.42.205
|
||||
private_ip: 10.0.0.4
|
||||
|
||||
app_servers:
|
||||
hosts:
|
||||
de:
|
||||
ansible_host: 116.202.82.115
|
||||
galera_node_name: de
|
||||
galera_node_address: 116.202.82.115
|
||||
de2:
|
||||
ansible_host: 88.99.195.41
|
||||
galera_node_name: de2
|
||||
galera_node_address: 88.99.195.41
|
||||
vars:
|
||||
galera_cluster_name: hostuk-galera
|
||||
galera_sst_method: mariabackup
|
||||
db_name: hostuk
|
||||
db_user: hostuk
|
||||
redis_maxmemory: 512mb
|
||||
|
||||
builders:
|
||||
hosts:
|
||||
build:
|
||||
ansible_host: 46.224.93.62
|
||||
private_ip: 10.0.0.5
|
||||
98
playbooks/redis-deploy.yml
Normal file
98
playbooks/redis-deploy.yml
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
# Redis Sentinel Deployment
|
||||
# Deploys Redis with Sentinel on de + de2
|
||||
#
|
||||
# Usage:
|
||||
# core deploy ansible playbooks/redis-deploy.yml -i playbooks/inventory.yml
|
||||
---
|
||||
- name: Deploy Redis with Sentinel
|
||||
hosts: app_servers
|
||||
become: true
|
||||
vars:
|
||||
redis_version: "7"
|
||||
redis_password: "{{ lookup('env', 'REDIS_PASSWORD') | default('', true) }}"
|
||||
|
||||
tasks:
|
||||
- name: Create Redis data directory
|
||||
file:
|
||||
path: /opt/redis/data
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Create Redis config directory
|
||||
file:
|
||||
path: /opt/redis/conf
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Write Redis configuration
|
||||
copy:
|
||||
dest: /opt/redis/conf/redis.conf
|
||||
content: |
|
||||
maxmemory {{ redis_maxmemory }}
|
||||
maxmemory-policy allkeys-lru
|
||||
appendonly yes
|
||||
appendfsync everysec
|
||||
tcp-keepalive 300
|
||||
timeout 0
|
||||
{% if redis_password %}
|
||||
requirepass {{ redis_password }}
|
||||
masterauth {{ redis_password }}
|
||||
{% endif %}
|
||||
|
||||
- name: Write Sentinel configuration
|
||||
copy:
|
||||
dest: /opt/redis/conf/sentinel.conf
|
||||
content: |
|
||||
port 26379
|
||||
sentinel monitor hostuk-redis 116.202.82.115 6379 2
|
||||
sentinel down-after-milliseconds hostuk-redis 5000
|
||||
sentinel failover-timeout hostuk-redis 60000
|
||||
sentinel parallel-syncs hostuk-redis 1
|
||||
{% if redis_password %}
|
||||
sentinel auth-pass hostuk-redis {{ redis_password }}
|
||||
{% endif %}
|
||||
|
||||
- name: Stop existing Redis containers
|
||||
shell: |
|
||||
docker stop redis redis-sentinel 2>/dev/null || true
|
||||
docker rm redis redis-sentinel 2>/dev/null || true
|
||||
changed_when: false
|
||||
|
||||
- name: Start Redis container
|
||||
shell: |
|
||||
docker run -d \
|
||||
--name redis \
|
||||
--restart unless-stopped \
|
||||
--network host \
|
||||
-v /opt/redis/data:/data \
|
||||
-v /opt/redis/conf/redis.conf:/usr/local/etc/redis/redis.conf \
|
||||
redis:{{ redis_version }}-alpine \
|
||||
redis-server /usr/local/etc/redis/redis.conf
|
||||
|
||||
- name: Start Redis Sentinel container
|
||||
shell: |
|
||||
docker run -d \
|
||||
--name redis-sentinel \
|
||||
--restart unless-stopped \
|
||||
--network host \
|
||||
-v /opt/redis/conf/sentinel.conf:/usr/local/etc/redis/sentinel.conf \
|
||||
redis:{{ redis_version }}-alpine \
|
||||
redis-sentinel /usr/local/etc/redis/sentinel.conf
|
||||
|
||||
- name: Wait for Redis to be ready
|
||||
shell: |
|
||||
for i in $(seq 1 30); do
|
||||
docker exec redis redis-cli ping 2>/dev/null | grep -q PONG && exit 0
|
||||
sleep 1
|
||||
done
|
||||
exit 1
|
||||
changed_when: false
|
||||
|
||||
- name: Check Redis info
|
||||
shell: docker exec redis redis-cli info replication | head -10
|
||||
register: redis_info
|
||||
changed_when: false
|
||||
|
||||
- name: Display Redis info
|
||||
debug:
|
||||
var: redis_info.stdout_lines
|
||||
Loading…
Add table
Reference in a new issue