From dde99083fbff54d90a5b06f0a8e264d19e16be61 Mon Sep 17 00:00:00 2001 From: John Date: Thu, 30 Oct 2025 08:52:38 +0000 Subject: [PATCH] Rebalance swarm: promote all nodes to managers and remove hostname constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Promoted p1, p2, p3 from worker to manager nodes for 4-node quorum - Removed unnecessary hostname constraints from service configs - Only traefik and portainer remain pinned to p0 - Services now auto-balance across all nodes via GlusterFS shared storage - Updated README with cluster overview and distribution strategy 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- README.md | 49 ++++++++++++++++++++++++++++++++- stacks/apps/adminer/stack.yml | 3 -- stacks/apps/authentik/stack.yml | 9 ------ stacks/apps/n8n/stack.yml | 3 -- stacks/apps/paperless/stack.yml | 6 ---- stacks/apps/uptime/stack.yml | 5 ++-- stacks/web/tracker/stack.yml | 3 -- 7 files changed, 51 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 7fd4a87..a8cad84 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,50 @@ # swarm-production -Production Docker Swarm Infrastructure \ No newline at end of file +Production Docker Swarm Infrastructure + +## Cluster Overview + +### Nodes +- **p0** (Manager/Leader) - Infrastructure services +- **p1** (Manager) - Application services +- **p2** (Manager) - Application services +- **p3** (Manager) - Application services + +All nodes are managers providing a 4-node quorum (can tolerate 2 node failures while maintaining quorum). + +### Storage +- **GlusterFS** mounted at `/home/doc/swarm-data/` on all nodes +- Shared storage enables services to run on any node without storage constraints + +## Service Distribution Strategy + +### Pinned Services +Services that must run on specific nodes: + +- **traefik** (p0) - Published ports 80/443, needs stable IP for DNS +- **portainer** (p0) - Management UI, stays with leader for convenience +- **rsync** (manager constraint) - Backup service, needs manager access + +### Floating Services +Services that can run on any node (swarm auto-balances): + +- adminer +- authentik (server, worker, redis) +- n8n +- paperless (webserver, redis) +- tracker-nginx +- uptime-kuma + +## Recent Changes (2025-10-30) + +### Swarm Rebalancing +- Promoted p1, p2, p3 from workers to managers +- Removed unnecessary hostname constraints from service configs +- Force-redeployed services to redistribute across all nodes +- Verified GlusterFS accessibility on all nodes + +### Results +- Achieved balanced workload distribution across all 4 nodes +- Improved high availability with 4-node manager quorum +- Services now self-balance automatically when nodes fail/recover +- Fixed Portainer agent connectivity by restarting agents after manager promotion \ No newline at end of file diff --git a/stacks/apps/adminer/stack.yml b/stacks/apps/adminer/stack.yml index 1e7b023..25134e0 100644 --- a/stacks/apps/adminer/stack.yml +++ b/stacks/apps/adminer/stack.yml @@ -10,9 +10,6 @@ services: - ADMINER_DESIGN=nette deploy: replicas: 1 - placement: - constraints: - - node.hostname == p0 networks: homelab: external: true diff --git a/stacks/apps/authentik/stack.yml b/stacks/apps/authentik/stack.yml index f56aae4..ec587ea 100644 --- a/stacks/apps/authentik/stack.yml +++ b/stacks/apps/authentik/stack.yml @@ -10,9 +10,6 @@ services: - homelab deploy: replicas: 1 - placement: - constraints: - - node.hostname == p0 authentik_server: image: ghcr.io/goauthentik/server:2025.10.0 @@ -38,9 +35,6 @@ services: - homelab deploy: replicas: 1 - placement: - constraints: - - node.hostname == p0 labels: - "traefik.enable=true" - "traefik.http.routers.authentik.rule=Host(`auth.frostlabs.me`)" @@ -75,9 +69,6 @@ services: - homelab deploy: replicas: 1 - placement: - constraints: - - node.hostname == p0 depends_on: - redis diff --git a/stacks/apps/n8n/stack.yml b/stacks/apps/n8n/stack.yml index 9e9fdaa..3bd549d 100644 --- a/stacks/apps/n8n/stack.yml +++ b/stacks/apps/n8n/stack.yml @@ -17,9 +17,6 @@ services: - /var/run/docker.sock:/var/run/docker.sock:ro deploy: replicas: 1 - placement: - constraints: - - node.hostname == p0 restart_policy: condition: on-failure delay: 5s diff --git a/stacks/apps/paperless/stack.yml b/stacks/apps/paperless/stack.yml index 1e08428..acc8bc3 100644 --- a/stacks/apps/paperless/stack.yml +++ b/stacks/apps/paperless/stack.yml @@ -5,9 +5,6 @@ services: - homelab deploy: replicas: 1 - placement: - constraints: - - node.hostname == p0 paperless_webserver: image: ghcr.io/paperless-ngx/paperless-ngx:latest @@ -48,9 +45,6 @@ services: - homelab deploy: replicas: 1 - placement: - constraints: - - node.hostname == p0 depends_on: # Fixed: removed postgres dependency - paperless_redis diff --git a/stacks/apps/uptime/stack.yml b/stacks/apps/uptime/stack.yml index 77e9f8d..ccda841 100644 --- a/stacks/apps/uptime/stack.yml +++ b/stacks/apps/uptime/stack.yml @@ -14,9 +14,10 @@ services: retries: 3 start_period: 60s deploy: - placement: - constraints: [node.hostname == p0] replicas: 1 + placement: + preferences: + - spread: node.hostname restart_policy: condition: on-failure delay: 10s diff --git a/stacks/web/tracker/stack.yml b/stacks/web/tracker/stack.yml index 8aa8206..a92a926 100644 --- a/stacks/web/tracker/stack.yml +++ b/stacks/web/tracker/stack.yml @@ -11,9 +11,6 @@ services: - /home/doc/swarm-data/appdata/webfiles/production/taylors-development:/usr/share/nginx/html:ro deploy: replicas: 1 - placement: - constraints: - - node.role == worker networks: homelab: external: true \ No newline at end of file