Cluster-Problem

This commit is contained in:
2026-01-09 18:53:26 +01:00
parent 79c2309d5b
commit cfd1099405
2 changed files with 187 additions and 117 deletions

View File

@@ -1,97 +1,192 @@
#!/usr/bin/env bash
set -Eeuo pipefail
########################################
# Logging / Errors
########################################
# -------------------------
# Logging helpers (stderr!)
# -------------------------
_is_tty() { [[ -t 2 ]]; }
_ts() { date '+%F %T'; }
log() { echo "[$(_ts)] $*"; }
log() { echo "[$(_ts)] $*" >&2; }
info() { log "INFO: $*"; }
warn() { log "WARN: $*"; }
die() { log "ERROR: $*"; exit 1; }
on_error() {
die "Failed at line $1: $2 (exit=$3)"
}
setup_traps() {
trap 'on_error $LINENO "$BASH_COMMAND" "$?"' ERR
}
########################################
# Preconditions
########################################
need_cmd() {
local c
for c in "$@"; do
command -v "$c" >/dev/null 2>&1 || die "Missing command: $c"
done
}
########################################
on_error() {
local lineno="$1" cmd="$2" code="$3"
die "Failed at line ${lineno}: ${cmd} (exit=${code})"
}
setup_traps() {
trap 'on_error "${LINENO}" "${BASH_COMMAND}" "$?"' ERR
}
# -------------------------
# Proxmox helpers
########################################
# -------------------------
pve_storage_exists() {
local st="$1"
pvesm status --storage "$st" >/dev/null 2>&1
}
pve_bridge_exists() {
[[ -d "/sys/class/net/$1/bridge" ]]
local br="$1"
[[ -d "/sys/class/net/${br}/bridge" ]]
}
pve_storage_exists() {
pvesm status --storage "$1" >/dev/null 2>&1
# Return a list of all VMIDs in the cluster (VM + CT)
# Output: one vmid per line (stdout only)
pve_cluster_vmids() {
need_cmd pvesh python3
pvesh get /cluster/resources --type vm --output-format json \
| python3 - <<'PY'
import json,sys
data=json.load(sys.stdin)
ids=sorted({int(x["vmid"]) for x in data if "vmid" in x})
for i in ids:
print(i)
PY
}
########################################
# Template handling
########################################
# Pick a free CTID cluster-wide. Default start=2000 to avoid collisions with "classic" ranges.
pve_next_free_ctid() {
need_cmd python3
local start="${1:-2000}"
local end="${2:-9999}"
local used
used="$(pve_cluster_vmids || true)"
python3 - <<PY
used=set()
for line in """$used""".splitlines():
line=line.strip()
if not line:
continue
try:
used.add(int(line))
except:
pass
start=int("$start"); end=int("$end")
for vmid in range(start, end+1):
if vmid not in used:
print(vmid)
raise SystemExit(0)
raise SystemExit(1)
PY
}
# Ensure Debian 12 CT template is available.
# Returns template ref like: local:vztmpl/debian-12-standard_12.12-1_amd64.tar.zst (stdout only)
pve_template_ensure_debian12() {
local tpl="debian-12-standard_12.12-1_amd64.tar.zst"
local store="$1"
need_cmd pveam awk grep
local store="$1"
local tpl="debian-12-standard_12.12-1_amd64.tar.zst"
# pveam templates are usually on "local" (dir storage). If chosen storage doesn't support templates -> fallback.
if ! pveam list "$store" >/dev/null 2>&1; then
warn "pveam storage '$store' not available for templates; falling back to 'local'"
store="local"
fi
if ! pveam list "$store" | awk '{print $2}' | grep -qx "$tpl"; then
info "Downloading CT template to $store: $tpl"
pveam update
pveam download "$store" "$tpl"
# Update template list (quietly)
pveam update >/dev/null 2>&1 || true
# Download if missing
if ! pveam list "$store" 2>/dev/null | awk '{print $2}' | grep -qx "$tpl"; then
info "Downloading CT template to ${store}: ${tpl}"
pveam download "$store" "$tpl" >/dev/null
fi
echo "$store:vztmpl/$tpl"
# Print template ref for pct create
echo "${store}:vztmpl/${tpl}"
}
########################################
# Cluster-wide CTID
########################################
pve_next_free_ctid() {
local used
used="$(pvesh get /cluster/resources --type vm | awk 'NR>1 {print $1}' | sort -n)"
for id in $(seq 100 9999); do
if ! echo "$used" | grep -qx "$id"; then
echo "$id"
return
fi
done
die "No free CTID found"
}
########################################
# Networking
########################################
pve_build_net0() {
local bridge="$1"
local ip="$2"
if [[ "$ip" == "dhcp" ]]; then
echo "name=eth0,bridge=$bridge,ip=dhcp"
echo "name=eth0,bridge=${bridge},ip=dhcp"
else
echo "name=eth0,bridge=$bridge,ip=$ip"
# expects CIDR e.g. 192.168.45.171/24 (no gateway here; can be set later if needed)
echo "name=eth0,bridge=${bridge},ip=${ip}"
fi
}
pve_create_ct() {
need_cmd pct
local ctid="$1" template="$2" hostname="$3"
local cores="$4" memory="$5" swap="$6"
local storage="$7" disk="$8"
local bridge="$9" ip="${10}" unpriv="${11}"
local net0 rootfs features
net0="$(pve_build_net0 "$bridge" "$ip")"
rootfs="${storage}:${disk}"
features="nesting=1,keyctl=1,fuse=1"
info "Creating CT ${ctid} (${hostname}) from ${template}"
pct create "$ctid" "$template" \
--hostname "$hostname" \
--cores "$cores" \
--memory "$memory" \
--swap "$swap" \
--net0 "$net0" \
--rootfs "$rootfs" \
--unprivileged "$unpriv" \
--features "$features" \
--start 0
}
pve_start_ct() {
need_cmd pct
local ctid="$1"
info "Starting CT ${ctid}"
pct start "$ctid" >/dev/null
}
# Wait for an IPv4 (non-empty) from pct exec "hostname -I"
pve_wait_ct_ip() {
need_cmd pct awk
local ctid="$1"
local timeout="${2:-120}"
local slept=0
while (( slept < timeout )); do
# hostname -I may return multiple; pick first IPv4
local ip
ip="$(pct exec "$ctid" -- bash -lc "hostname -I 2>/dev/null | awk '{print \$1}'" || true)"
ip="${ip//$'\r'/}"
ip="${ip//$'\n'/}"
if [[ -n "$ip" ]]; then
echo "$ip"
return 0
fi
sleep 2
slept=$((slept+2))
done
return 1
}
# Execute a command inside CT with proper shell
ct_exec() {
need_cmd pct
local ctid="$1"; shift
pct exec "$ctid" -- bash -lc "$*"
}
# Upload a local file/stream into CT (uses pct push)
ct_push() {
need_cmd pct
local ctid="$1" src="$2" dst="$3"
pct push "$ctid" "$src" "$dst"
}

View File

@@ -1,69 +1,39 @@
services:
# --- Vectorstore DB (pgvector) ---
supabase-db:
image: pgvector/pgvector:pg15
container_name: supabase-db
restart: unless-stopped
environment:
POSTGRES_DB: ${SB_DB_NAME}
POSTGRES_USER: ${SB_DB_USER}
POSTGRES_PASSWORD: ${SB_DB_PASS}
volumes:
- ./volumes/supabase-db:/var/lib/postgresql/data
- ./init:/docker-entrypoint-initdb.d:ro
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${SB_DB_USER} -d ${SB_DB_NAME}"]
interval: 10s
timeout: 5s
retries: 15
# Optional: REST API auf die Vector-DB (wenn du es brauchst)
# Wenn du "von außen keinen DB-Zugriff" willst: später im Reverse Proxy sperren oder Port entfernen.
postgrest:
image: postgrest/postgrest:v12.2.8
container_name: postgrest
restart: unless-stopped
depends_on:
supabase-db:
condition: service_healthy
environment:
PGRST_DB_URI: postgres://${SB_DB_USER}:${SB_DB_PASS}@supabase-db:5432/${SB_DB_NAME}
PGRST_DB_SCHEMA: public
PGRST_DB_ANON_ROLE: anon
PGRST_SERVER_PORT: 3000
ports:
- "3000:3000"
# --- n8n DB (separat, sauber getrennt) ---
n8n-db:
image: postgres:15-alpine
container_name: n8n-db
restart: unless-stopped
environment:
POSTGRES_DB: ${N8N_DB_NAME}
POSTGRES_USER: ${N8N_DB_USER}
POSTGRES_PASSWORD: ${N8N_DB_PASS}
POSTGRES_DB: ${N8N_DB_NAME}
volumes:
- ./volumes/n8n-db:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${N8N_DB_USER} -d ${N8N_DB_NAME}"]
interval: 10s
timeout: 5s
retries: 15
networks: [custnet]
supabase-db:
image: pgvector/pgvector:pg15
container_name: supabase-db
restart: unless-stopped
environment:
POSTGRES_USER: ${SB_DB_USER}
POSTGRES_PASSWORD: ${SB_DB_PASS}
POSTGRES_DB: ${SB_DB_NAME}
volumes:
- ./volumes/supabase-db:/var/lib/postgresql/data
- ./sql/init_pgvector.sql:/docker-entrypoint-initdb.d/init_pgvector.sql:ro
networks: [custnet]
# --- n8n ---
n8n:
image: docker.n8n.io/n8nio/n8n:latest
container_name: n8n
restart: unless-stopped
depends_on:
n8n-db:
condition: service_healthy
- n8n-db
ports:
- "5678:5678"
environment:
# n8n DB
DB_TYPE: postgresdb
DB_POSTGRESDB_HOST: n8n-db
DB_POSTGRESDB_PORT: 5432
@@ -71,25 +41,30 @@ services:
DB_POSTGRESDB_USER: ${N8N_DB_USER}
DB_POSTGRESDB_PASSWORD: ${N8N_DB_PASS}
GENERIC_TIMEZONE: Europe/Berlin
NODE_ENV: production
N8N_HOST: ${N8N_HOST}
N8N_PORT: 5678
WEBHOOK_URL: ${N8N_WEBHOOK_URL}
N8N_BASIC_AUTH_ACTIVE: "true"
N8N_BASIC_AUTH_USER: ${N8N_BASIC_AUTH_USER}
N8N_BASIC_AUTH_PASSWORD: ${N8N_BASIC_AUTH_PASS}
# n8n base
N8N_ENCRYPTION_KEY: ${N8N_ENCRYPTION_KEY}
GENERIC_TIMEZONE: Europe/Berlin
N8N_PORT: 5678
# Solange du KEIN HTTPS hast, sonst bekommst du genau die "secure cookie"-Meldung:
# External URL (Reverse Proxy Ziel)
N8N_HOST: ${N8N_HOST}
N8N_PROTOCOL: https
N8N_EDITOR_BASE_URL: ${N8N_EDITOR_BASE_URL}
WEBHOOK_URL: ${WEBHOOK_URL}
# solange noch kein TLS/Proxy aktiv:
N8N_SECURE_COOKIE: "false"
# Optional: wenn du später Community Nodes brauchst
N8N_COMMUNITY_PACKAGES_ENABLED: "true"
# optional Basic Auth
N8N_BASIC_AUTH_ACTIVE: "true"
N8N_BASIC_AUTH_USER: ${N8N_BASIC_AUTH_USER}
N8N_BASIC_AUTH_PASSWORD: ${N8N_BASIC_AUTH_PASSWORD}
volumes:
- ./volumes/n8n-data:/home/node/.n8n
networks: [custnet]
networks:
custnet:
driver: bridge