diff --git a/scripts/deployment/.env.prod.example b/scripts/deployment/.env.prod.example new file mode 100644 index 0000000..11cc0b7 --- /dev/null +++ b/scripts/deployment/.env.prod.example @@ -0,0 +1,80 @@ +# CurrenciCombo orchestrator production env (Phoenix CT 8604 / any systemd host) +# +# Installed by scripts/deployment/install.sh to: +# /etc/currencicombo/orchestrator.env +# +# Loaded by the currencicombo-orchestrator.service systemd unit via +# EnvironmentFile=. Values that are committed here are safe defaults; +# secrets are left blank and must be set before first boot. +# +# The portal is a statically built SPA (nginx), so it takes NO runtime env. +# Any VITE_* vars needed at build time are baked into dist/ by +# scripts/deployment/deploy-currencicombo-8604.sh before the rsync. + +############################################################ +# Server +############################################################ +NODE_ENV=production +PORT=8080 +# Bind to loopback only when behind NPMplus on the same host; bind +# 0.0.0.0 if NPMplus is on a different host (the CT 8604 case, so 0.0.0.0). +HOST=0.0.0.0 + +############################################################ +# Postgres (local to the CT per install.sh) +############################################################ +DATABASE_URL=postgresql://currencicombo@127.0.0.1:5432/currencicombo + +############################################################ +# Redis (local to the CT per install.sh) +############################################################ +REDIS_URL=redis://127.0.0.1:6379 + +############################################################ +# Event bus signing (REQUIRED). install.sh generates this on first run +# via `openssl rand -hex 32` unless the file already exists. +############################################################ +EVENT_SIGNING_SECRET= + +############################################################ +# API keys per role (REQUIRED). install.sh generates three random +# initiator/settler/auditor keys on first run unless set. +# Format: key1:role1,key2:role2,... +############################################################ +ORCHESTRATOR_API_KEYS= + +############################################################ +# Chain 138 — resolves EXT-CHAIN138-CI-RPC (already resolved). +############################################################ +CHAIN_138_RPC_URL=https://rpc.public-0138.defi-oracle.io +CHAIN_138_CHAIN_ID=138 + +# Leave empty to run mock notary. Populate after running +# `contracts/scripts/deploy-notary-registry.ts` once. +NOTARY_REGISTRY_ADDRESS= +# Leave empty to run mock notary. Otherwise 0x-prefixed 32-byte hex. +ORCHESTRATOR_PRIVATE_KEY= + +############################################################ +# External dependency blockers (leave blank → mock fallback + EXT-* log) +# These are the exact IDs that the Proxmox +# scripts/verify/check-external-dependencies.sh gate knows about. +############################################################ + +# EXT-DBIS-CORE — set when dbis_core is deployed and reachable. +DBIS_CORE_URL= + +# EXT-FIN-GATEWAY — set when a real Alliance Access / FIN gateway is +# provisioned. Leave blank to use PR R's in-process sandbox. +FIN_SANDBOX_URL= + +# EXT-CC-* — the following four blockers are upstream-scaffold repos +# (cc-payment-adapters, cc-audit-ledger, cc-shared-events, +# cc-shared-schemas). They cannot be resolved from this repo; no +# env var flips them. The orchestrator logs EXT-CC-* as active on boot. + +# Identity + controls matrix (not a blocker IDs per se — they ship +# today via the cc-identity-core and cc-compliance-controls adapters +# merged in PR V/W). Blank keeps the embedded v0 matrix + mock identity. +CC_IDENTITY_URL= +CC_CONTROLS_MATRIX_URL= diff --git a/scripts/deployment/README.md b/scripts/deployment/README.md new file mode 100644 index 0000000..6cab131 --- /dev/null +++ b/scripts/deployment/README.md @@ -0,0 +1,254 @@ +# CurrenciCombo — Phoenix / systemd deployment + +This directory holds everything needed to deploy CurrenciCombo onto a +systemd host — starting with Phoenix CT 8604 on `r630-01`, but any +Debian/Ubuntu (or Alpine) host with Postgres + Redis available works. + +The files here are **target-agnostic**. They hardcode no IPs, hostnames, +or VLANs. Environment-specific values — `curucombo.曼李.com`, the +`10.160.0.14` VIP, the NPMplus reverse proxy — are applied at the +edge (NPMplus) and at `/etc/currencicombo/orchestrator.env`, never in +the repo. + +## Architecture on CT 8604 + +``` + ┌────────────────────┐ + curucombo.曼李.com ──▶ NPMplus │192.168.11.167 │ + (Cloudflare-proxied) │ TLS terminates here│ + └─────────┬──────────┘ + │ + ┌──────────────────────┴──────────────────────┐ + │ │ + ▼ ▼ + curucombo.曼李.com/* (default) curucombo.曼李.com/api/* + (incl. SSE /api/plans/*/events/stream) + │ │ + CT 8604 │10.160.0.14:3000 CT 8604 │10.160.0.14:8080 + ▼ ▼ + ┌─────────────────────────────┐ ┌─────────────────────────────┐ + │ currencicombo-webapp.service │ │ currencicombo-orchestrator │ + │ nginx → /opt/currencicombo/ │ │ .service (systemd) │ + │ webapp/dist/ │ │ node dist/index.js │ + └─────────────────────────────┘ │ env /etc/currencicombo/ │ + │ orchestrator.env │ + └──────────────┬──────────────┘ + │ + ▼ + postgresql + redis (same CT, local) +``` + +## Files + +| path | purpose | +|---|---| +| `systemd/currencicombo-orchestrator.service` | Node orchestrator, reads `/etc/currencicombo/orchestrator.env` | +| `systemd/currencicombo-webapp.service` | nginx serving the Vite SPA on `:3000` | +| `webapp-nginx.conf` | full nginx.conf for the webapp unit | +| `.env.prod.example` | env template installed to `/etc/currencicombo/orchestrator.env` | +| `install.sh` | one-shot host setup: user / dirs / DB role / systemd units / first-run key handoff file | +| `install-prune-cron.sh` | opt-in daily cron that prunes `/var/lib/currencicombo/backups/` (30-day retention, keep-min 5) | +| `deploy-currencicombo-8604.sh` | build-and-swap deploy driver (the script Phoenix/proxmox deploy-api calls) | +| `README.md` | you're reading it | + +## First-time setup on CT 8604 + +All commands run as **root** inside the CT. + +1. Ensure Postgres + Redis are installed and running: + ``` + apt-get install -y postgresql redis-server + systemctl enable --now postgresql redis-server + ``` +2. Clone the repo into its staging location (once): + ``` + install -d -o root -g root /var/lib/currencicombo + git clone https://gitea.d-bis.org/d-bis/CurrenciCombo.git /var/lib/currencicombo/repo + ``` +3. Run `install.sh` (creates user, DB, systemd units, env file): + ``` + bash /var/lib/currencicombo/repo/scripts/deployment/install.sh + ``` + On success you'll see: + ``` + [install] generated EVENT_SIGNING_SECRET (64 hex) + [install] generated 3 API keys (initiator/settler/auditor) + [install] initial secrets written to /root/currencicombo-first-keys.txt (0600) — record in password manager, then 'shred -u /root/currencicombo-first-keys.txt' + [install] install complete. + ``` + `install.sh` writes the three API keys + `EVENT_SIGNING_SECRET` to **two** places: + - `/etc/currencicombo/orchestrator.env` — canonical, read by systemd (`0640`, owned by `currencicombo`). + - `/root/currencicombo-first-keys.txt` — **root-only handoff file** (`0600`). Grab it once, record the values in your password manager, then `shred -u` it. + The handoff file is **not** regenerated on re-run — if `orchestrator.env` already exists, `install.sh` does not produce new secrets. +4. (Optional) Install the backup-pruning cron: + ``` + bash /var/lib/currencicombo/repo/scripts/deployment/install-prune-cron.sh + ``` + Drops a `/etc/cron.daily/currencicombo-prune-backups` that deletes anything under `/var/lib/currencicombo/backups/` older than 30 days while **always keeping the newest 5** regardless of age. Safe on re-run; opt out with `sudo rm /etc/cron.daily/currencicombo-prune-backups`. +5. If you need to resolve any `EXT-*` blocker (e.g. point at a real dbis_core), edit `/etc/currencicombo/orchestrator.env` before the first deploy. +6. First build-and-start: + ``` + bash /var/lib/currencicombo/repo/scripts/deployment/deploy-currencicombo-8604.sh + ``` + Expected tail: + ``` + [deploy] orchestrator ready: {"ready":true} + [deploy] portal OK (HTTP 200) + [deploy] EXT-* blocker summary from orchestrator boot log: + [ExternalBlockers] 6 active, 1 resolved + id: EXT-DBIS-CORE + id: EXT-CC-PAYMENT-ADAPTERS + ... + id: EXT-CHAIN138-CI-RPC (resolved) + [deploy] deploy complete. ref=main sha= ts= + ``` + +## NPMplus ingress changes required at cutover + +`curucombo.曼李.com` today proxies 100% to `10.160.0.14:3000`. After +cutover it must become a **single-origin path-routed proxy** with **two** +rules (the SSE endpoint lives at `/api/plans/:id/events/stream`, so it's +already under `/api/*` — no separate `/events/*` rule is needed): + +| location | upstream | proxy settings | +|---|---|---| +| `/api/*` | `http://10.160.0.14:8080` | **SSE-friendly settings apply here because the SSE route `/api/plans/:id/events/stream` is under /api/**. Set: `proxy_http_version 1.1;`, `proxy_set_header Connection "";`, `proxy_buffering off;`, `proxy_cache off;`, `proxy_read_timeout 24h;`, `proxy_send_timeout 24h;`. Standard forwarding: `proxy_set_header Host $host;`, `X-Real-IP $remote_addr;`, `X-Forwarded-For $proxy_add_x_forwarded_for;`, `X-Forwarded-Proto $scheme;`. The slight overhead of `proxy_buffering off` on plain REST calls is negligible for this workload. | +| `/` | `http://10.160.0.14:3000` | Vite SPA. Default upstream. No special settings. | + +If you skip the `/api/*` rule, the nginx in `webapp-nginx.conf` +intentionally returns `HTTP 421` for that path — a clean "upstream is +misconfigured" signal instead of silently returning `index.html` and +breaking the browser with a JSON parse error. + +## Subsequent deploys + +Every deploy after the first is just: + +``` +sudo /var/lib/currencicombo/repo/scripts/deployment/deploy-currencicombo-8604.sh +``` + +Flags: +- `--ref=` — deploy something other than `main`. +- `--dry-run` — print what would happen, don't touch anything. +- `--skip-migrate` — hotfix deploys that don't change the schema. +- `--skip-build` — reuse the build from the previous run (debugging only). +- `--rollback` — restore the most recent `/var/lib/currencicombo/backups//` and restart units. Does **not** git-pull or rebuild. + +Every deploy writes a timestamped backup to +`/var/lib/currencicombo/backups//` before swapping. Pruning is opt-in via `install-prune-cron.sh` (30-day retention, keep-min 5). Without the cron, backups accumulate forever — quietly filling `/var/lib` is how the next outage starts. + +## Failure handling on deploy + +**Rollback is manual.** `deploy-currencicombo-8604.sh` **does not** auto-restore the previous backup if the orchestrator fails to become ready. First cutovers typically fail because of env typos or migration mistakes, and auto-restoring hides the failure state ops needs. + +Instead, on a readiness timeout the deploy script prints: +- last 40 lines of `journalctl -u currencicombo-orchestrator` +- last 20 lines of `journalctl -u currencicombo-webapp` +- **the exact `--rollback` command with the specific backup path filled in** + +Example tail on failure: +``` +================================================================ +DEPLOY FAILED: orchestrator did not become ready after 60s +================================================================ + +## currencicombo-orchestrator (last 40 lines): +... env validation error: EVENT_SIGNING_SECRET is required ... + +## Units are in whatever state deploy left them. To restore +## the previous build (does NOT revert DB migrations): + + sudo /var/lib/currencicombo/repo/scripts/deployment/deploy-currencicombo-8604.sh --rollback + # (will restore /var/lib/currencicombo/backups/20260423-140215) + +================================================================ +``` + +Rollback one-liner (when ops has decided to restore): +``` +sudo /var/lib/currencicombo/repo/scripts/deployment/deploy-currencicombo-8604.sh --rollback +``` + +Rollback restores the most recent backup and restarts both units. It **does not** touch the DB. If the failed deploy applied a new migration, DB rollback is a manual `psql` task — the orchestrator's migration runner only emits `up()` paths. + +## Post-cutover smoke checks through NPMplus + +Once the NPMplus `/api/*` rule is live, from a workstation (not the CT): + +``` +# 1. Front-door TLS is healthy +curl -skI https://curucombo.xn--vov0g.com/ | head -3 +# expect: HTTP/2 200 +# expect: NO 'x-nextjs-prerender' header (that was the old Next.js build) + +# 2. SPA is the new Vite portal +curl -sk https://curucombo.xn--vov0g.com/ | grep -oE '[^<]+' +# expect: Solace Bank Group PLC — Treasury Management Portal + +# 3. Orchestrator ready through NPMplus +curl -sk https://curucombo.xn--vov0g.com/api/ready | head -1 +# expect: {"ready":true} (not HTML) + +# 4. Orchestrator blocker log (through CT shell, not NPMplus) +ssh root@10.160.0.14 'journalctl -u currencicombo-orchestrator -n 200 | grep -E "ExternalBlockers|EXT-"' +# expect: [ExternalBlockers] 6 active, 1 resolved +# expect: one line per EXT-* id + +# 5. SSE actually streams (catches silent NPMplus proxy_buffering=on misconfig) +curl -sk -N --max-time 5 -H 'Accept: text/event-stream' \ + https://curucombo.xn--vov0g.com/api/plans/demo-pay-014/events/stream \ + | head -20 || true +# expect: HTTP/2 200 with Content-Type: text/event-stream +# expect: at least one 'data: {...}\n\n' frame to arrive WITHIN ~1s +# if you see nothing for 3-5s and then everything dumps at once: +# NPMplus has proxy_buffering=on. Fix: proxy_buffering off; proxy_http_version 1.1; proxy_set_header Connection ""; +# if the ping is 401/403: expected — SSE is auth-gated; the point is to +# prove the request REACHED the orchestrator (content-type header + +# chunked response headers) rather than hitting the Vite SPA. +``` + +A plain `HTTP/2 200` with a `Content-Type: text/html` body on `/api/ready` means NPMplus is silently falling back to the `/` rule — the `/api/*` rule is missing or ordered wrong. The `webapp-nginx.conf` in this repo returns `HTTP 421` for `/api/*` to make that case obvious when debugging CT-locally, but at the NPMplus edge nginx serves whatever NPMplus routes to it. + +## Troubleshooting + +| symptom | cause / check | +|---|---| +| `/api/*` returns `421 NPMplus is misconfigured` | NPMplus `/api/*` rule missing or wrong upstream. | +| `/events/*` connects then disconnects after ~60s | NPMplus forgot `proxy_buffering off` + high `proxy_read_timeout`. | +| orchestrator unit enters `activating (auto-restart)` loop | `journalctl -u currencicombo-orchestrator -n 80` — usually a zod env-validation error. The boot-time assertion message names the missing/invalid var. | +| orchestrator boot log says `[ExternalBlockers] N active` where N > 6 | you added an `EXT-*` env var without also updating the central registry in `orchestrator/src/config/externalBlockers.ts`. | +| `/health` returns 503 but `/ready` is 200 | memory `critical` is a separate signal from readiness. Inspect CT memory; this happens on constrained builders and is not a deploy bug. | +| portal page loads but MetaMask login does nothing | the portal couldn't reach `/api/auth/*`. Walk back up the NPMplus rule chain. | + +## Cutting over from the pre-existing Next.js build + +Phoenix previously had an older Next.js "ISO-20022 Combo Flow" app in +`/opt/currencicombo/webapp`. The cutover sequence on CT 8604 is: + +1. **Backup the old install** out-of-band: + ``` + tar czf /root/currencicombo-preRepo-$(date +%s).tgz /opt/currencicombo /etc/currencicombo 2>/dev/null || true + ``` +2. **Disable the pre-existing systemd units** (they're the same names but point at the old tree): + ``` + systemctl stop currencicombo-webapp currencicombo-orchestrator + systemctl disable currencicombo-webapp currencicombo-orchestrator + ``` +3. Run `install.sh` (writes the new units, new nginx, new env). On an already-set-up host this is idempotent: it preserves `/etc/currencicombo/orchestrator.env` if it already exists. +4. Run `deploy-currencicombo-8604.sh`. +5. Apply the NPMplus `/api` + `/events` path rules. +6. Smoke from outside the CT: `curl -skI https://curucombo.xn--vov0g.com/ && curl -sk https://curucombo.xn--vov0g.com/api/ready`. + +## Proxmox-side follow-up (not in this PR) + +After this PR merges and the above cutover runs cleanly, the +`/home/intlc/projects/proxmox` repo needs a separate commit to: + +- Update `phoenix-deploy-api/deploy-targets.json` to point at: + - repo: `d-bis/CurrenciCombo` + - branch: `main` + - target: `default` + - deploy entrypoint: `scripts/deployment/deploy-currencicombo-8604.sh` +- Remove any stale `/opt/currencicombo/webapp` Next.js references. +- Drop any description of `ignoreBuildErrors: true` in `webapp/next.config.ts` — the new webapp is Vite+tsc-strict, no build-error suppression. diff --git a/scripts/deployment/deploy-currencicombo-8604.sh b/scripts/deployment/deploy-currencicombo-8604.sh new file mode 100755 index 0000000..ca638a1 --- /dev/null +++ b/scripts/deployment/deploy-currencicombo-8604.sh @@ -0,0 +1,225 @@ +#!/usr/bin/env bash +# deploy-currencicombo-8604.sh — build-and-swap deploy for CurrenciCombo. +# +# Runs on a systemd host that has already had `install.sh` applied once. +# This is the script referenced by the Proxmox repo's +# `phoenix-deploy-api/deploy-targets.json` tuple +# (repo=d-bis/CurrenciCombo, branch=main, target=default). +# +# Steps (each idempotent, each can be --dry-run'd): +# 1. git clone/pull /var/lib/currencicombo/repo to the target ref. +# 2. Build orchestrator (npm ci + npm run build). +# 3. Build portal/webapp (npm ci + npm run build), baking +# VITE_ORCHESTRATOR_URL into the bundle. +# 4. Run DB migrations (npm run migrate in orchestrator/). +# 5. Stop systemd units. +# 6. rsync build output into /opt/currencicombo/{orchestrator,webapp}. +# 7. Start systemd units. +# 8. Smoke-test /ready + portal / + print EXT-* blocker summary. +# +# Rollback: `--rollback` restores the previous backup under +# /var/lib/currencicombo/backups/. +# +# CT 8604 is in the filename for ops-grep-ability; the script itself is +# host-agnostic. Override paths via env vars if you run it elsewhere. + +set -euo pipefail + +# ----- defaults (override via env) ------------------------------------ +: "${CC_GIT_REMOTE:=https://gitea.d-bis.org/d-bis/CurrenciCombo.git}" +: "${CC_GIT_REF:=main}" +: "${CC_REPO_DIR:=/var/lib/currencicombo/repo}" +: "${CC_APP_HOME:=/opt/currencicombo}" +: "${CC_BACKUP_DIR:=/var/lib/currencicombo/backups}" +: "${CC_USER:=currencicombo}" + +# Portal build-time env. The NPMplus ingress path-routes /api/* and +# /events/* to the orchestrator, so same-origin works. +: "${VITE_ORCHESTRATOR_URL:=https://curucombo.xn--vov0g.com}" + +: "${ORCHESTRATOR_UNIT:=currencicombo-orchestrator.service}" +: "${WEBAPP_UNIT:=currencicombo-webapp.service}" + +: "${CC_HEALTH_URL:=http://127.0.0.1:8080/ready}" +: "${CC_PORTAL_URL:=http://127.0.0.1:3000/}" +: "${CC_HEALTH_TIMEOUT_SECS:=60}" + +# ----- flags ---------------------------------------------------------- +DRY_RUN=0 +SKIP_MIGRATE=0 +SKIP_BUILD=0 +DO_ROLLBACK=0 + +usage() { + cat <<'USAGE' +Usage: sudo ./deploy-currencicombo-8604.sh [flags] + +Flags: + --ref= Override CC_GIT_REF (default: main) + --dry-run Print commands, don't run them + --skip-migrate Skip `npm run migrate` step (use for hotfix + deploys where schema hasn't changed) + --skip-build Reuse the existing build in CC_REPO_DIR/dist + (useful after `--dry-run --skip-build=no` from + the previous run) + --rollback Restore the most recent backup and restart. + Does not run git/build/migrate. + -h, --help This help + +Env overrides: + CC_GIT_REMOTE, CC_GIT_REF, CC_REPO_DIR, CC_APP_HOME, CC_BACKUP_DIR, + CC_USER, VITE_ORCHESTRATOR_URL, ORCHESTRATOR_UNIT, WEBAPP_UNIT, + CC_HEALTH_URL, CC_PORTAL_URL, CC_HEALTH_TIMEOUT_SECS +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --ref=*) CC_GIT_REF="${1#*=}"; shift ;; + --dry-run) DRY_RUN=1; shift ;; + --skip-migrate) SKIP_MIGRATE=1; shift ;; + --skip-build) SKIP_BUILD=1; shift ;; + --rollback) DO_ROLLBACK=1; shift ;; + -h|--help) usage; exit 0 ;; + *) echo "unknown arg: $1" >&2; usage; exit 2 ;; + esac +done + +log() { printf '[deploy] %s\n' "$*" >&2; } +warn() { printf '[deploy][WARN] %s\n' "$*" >&2; } +die() { printf '[deploy][FATAL] %s\n' "$*" >&2; exit 1; } +run() { if [[ "${DRY_RUN}" -eq 1 ]]; then printf '[deploy][dry-run] %s\n' "$*" >&2; else eval "$*"; fi; } +runcc() { if [[ "${DRY_RUN}" -eq 1 ]]; then printf '[deploy][dry-run][as %s] %s\n' "${CC_USER}" "$*" >&2; else sudo -u "${CC_USER}" -H bash -lc "$*"; fi; } + +[[ "$EUID" -eq 0 ]] || die "must run as root (sudo)" + +# ----- rollback fast-path --------------------------------------------- +if [[ "${DO_ROLLBACK}" -eq 1 ]]; then + LATEST="$(ls -1dt "${CC_BACKUP_DIR}"/* 2>/dev/null | head -1 || true)" + [[ -n "${LATEST}" ]] || die "no backup under ${CC_BACKUP_DIR}" + log "rolling back to ${LATEST}" + run "systemctl stop '${WEBAPP_UNIT}' '${ORCHESTRATOR_UNIT}'" + run "rsync -a --delete '${LATEST}/orchestrator/' '${CC_APP_HOME}/orchestrator/'" + run "rsync -a --delete '${LATEST}/webapp/' '${CC_APP_HOME}/webapp/'" + run "systemctl start '${ORCHESTRATOR_UNIT}' '${WEBAPP_UNIT}'" + log "rollback applied. systemctl status ${ORCHESTRATOR_UNIT} to verify." + exit 0 +fi + +# ----- 1. git --------------------------------------------------------- +if [[ ! -d "${CC_REPO_DIR}/.git" ]]; then + log "cloning ${CC_GIT_REMOTE} → ${CC_REPO_DIR}" + run "install -d -o '${CC_USER}' -g '${CC_USER}' -m 0755 '${CC_REPO_DIR}'" + runcc "git clone '${CC_GIT_REMOTE}' '${CC_REPO_DIR}'" +fi +runcc "cd '${CC_REPO_DIR}' && git fetch --prune origin" +runcc "cd '${CC_REPO_DIR}' && git reset --hard 'origin/${CC_GIT_REF}'" +REF_SHA="$(sudo -u "${CC_USER}" git -C "${CC_REPO_DIR}" rev-parse --short HEAD 2>/dev/null || echo unknown)" +log "repo at ${CC_GIT_REF} = ${REF_SHA}" + +# ----- 2. orchestrator build ----------------------------------------- +if [[ "${SKIP_BUILD}" -eq 0 ]]; then + log "building orchestrator" + runcc "cd '${CC_REPO_DIR}/orchestrator' && npm ci --no-audit --no-fund" + runcc "cd '${CC_REPO_DIR}/orchestrator' && npm run build" + log "building portal (VITE_ORCHESTRATOR_URL=${VITE_ORCHESTRATOR_URL})" + runcc "cd '${CC_REPO_DIR}' && npm ci --include=optional --no-audit --no-fund || npm ci --include=optional --force --no-audit --no-fund" + runcc "cd '${CC_REPO_DIR}' && VITE_ORCHESTRATOR_URL='${VITE_ORCHESTRATOR_URL}' npm run build" +else + log "skipping builds (--skip-build)" +fi + +# ----- 3. migrations -------------------------------------------------- +if [[ "${SKIP_MIGRATE}" -eq 0 ]]; then + log "running DB migrations" + runcc "cd '${CC_REPO_DIR}/orchestrator' && npm run migrate" +else + log "skipping migrations (--skip-migrate)" +fi + +# ----- 4. backup previous install ------------------------------------ +TS="$(date +%Y%m%d-%H%M%S)" +BACKUP="${CC_BACKUP_DIR}/${TS}" +if [[ -d "${CC_APP_HOME}/orchestrator/dist" || -d "${CC_APP_HOME}/webapp/dist" ]]; then + log "backing up current install → ${BACKUP}" + run "install -d -o root -g root -m 0700 '${BACKUP}/orchestrator' '${BACKUP}/webapp'" + run "rsync -a '${CC_APP_HOME}/orchestrator/' '${BACKUP}/orchestrator/'" + run "rsync -a '${CC_APP_HOME}/webapp/' '${BACKUP}/webapp/'" +fi + +# ----- 5. stop units -------------------------------------------------- +log "stopping systemd units" +run "systemctl stop '${WEBAPP_UNIT}' || true" +run "systemctl stop '${ORCHESTRATOR_UNIT}' || true" + +# ----- 6. swap in new build ------------------------------------------ +log "rsyncing new build into ${CC_APP_HOME}" +# Orchestrator: dist/ + node_modules/ + package.json + package-lock.json +runcc "rsync -a --delete '${CC_REPO_DIR}/orchestrator/dist/' '${CC_APP_HOME}/orchestrator/dist/'" +runcc "rsync -a '${CC_REPO_DIR}/orchestrator/node_modules/' '${CC_APP_HOME}/orchestrator/node_modules/'" +runcc "cp '${CC_REPO_DIR}/orchestrator/package.json' '${CC_APP_HOME}/orchestrator/package.json'" +runcc "cp '${CC_REPO_DIR}/orchestrator/package-lock.json' '${CC_APP_HOME}/orchestrator/package-lock.json'" +# Webapp: dist/ +runcc "rsync -a --delete '${CC_REPO_DIR}/dist/' '${CC_APP_HOME}/webapp/dist/'" + +# ----- 7. start units ------------------------------------------------ +log "starting systemd units" +run "systemctl start '${ORCHESTRATOR_UNIT}'" +run "systemctl start '${WEBAPP_UNIT}'" + +# ----- 8. smoke ------------------------------------------------------- +if [[ "${DRY_RUN}" -eq 1 ]]; then + log "dry-run: skipping smoke test" + exit 0 +fi + +log "waiting up to ${CC_HEALTH_TIMEOUT_SECS}s for orchestrator ${CC_HEALTH_URL}" +SECS=0 +until curl -sfL --max-time 3 "${CC_HEALTH_URL}" >/dev/null 2>&1; do + SECS=$((SECS + 2)) + if [[ "${SECS}" -ge "${CC_HEALTH_TIMEOUT_SECS}" ]]; then + # Loud failure summary. Deliberately does NOT auto-rollback — first + # cutovers often fail because of env/migration mistakes, and + # auto-restoring the old build hides the failure state ops needs to + # diagnose. Print the exact --rollback command with the specific + # backup path filled in, so it's one copy-paste away if desired. + { + echo + echo "================================================================" + echo "DEPLOY FAILED: orchestrator did not become ready after ${CC_HEALTH_TIMEOUT_SECS}s" + echo "================================================================" + echo + echo "## currencicombo-orchestrator (last 40 lines):" + journalctl -u "${ORCHESTRATOR_UNIT}" -n 40 --no-pager 2>&1 || echo "(journalctl unavailable)" + echo + echo "## currencicombo-webapp (last 20 lines):" + journalctl -u "${WEBAPP_UNIT}" -n 20 --no-pager 2>&1 || echo "(journalctl unavailable)" + echo + echo "## Units are in whatever state deploy left them. To restore" + echo "## the previous build (does NOT revert DB migrations):" + echo + if [[ -n "${BACKUP:-}" && -d "${BACKUP}" ]]; then + echo " sudo $0 --rollback" + echo " # (will restore ${BACKUP})" + else + echo " # No backup was taken (first deploy). Manual recovery required." + fi + echo + echo "================================================================" + } >&2 + exit 1 + fi + sleep 2 +done +log "orchestrator ready: $(curl -sf "${CC_HEALTH_URL}")" + +log "probing portal ${CC_PORTAL_URL}" +PORTAL_CODE="$(curl -s -o /dev/null -w '%{http_code}' "${CC_PORTAL_URL}" || echo ERR)" +[[ "${PORTAL_CODE}" =~ ^2 ]] || die "portal returned HTTP ${PORTAL_CODE}" +log "portal OK (HTTP ${PORTAL_CODE})" + +log "EXT-* blocker summary from orchestrator boot log:" +journalctl -u "${ORCHESTRATOR_UNIT}" --no-pager -n 200 \ + | grep -E 'ExternalBlockers|EXT-[A-Z0-9-]+' | tail -20 || true + +log "deploy complete. ref=${CC_GIT_REF} sha=${REF_SHA} ts=${TS}" diff --git a/scripts/deployment/install-prune-cron.sh b/scripts/deployment/install-prune-cron.sh new file mode 100755 index 0000000..22e934d --- /dev/null +++ b/scripts/deployment/install-prune-cron.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +# install-prune-cron.sh — opt-in cron job to prune old deploy backups. +# +# Run ONCE as root (or with sudo) after install.sh to enable daily +# pruning of /var/lib/currencicombo/backups/. The pruner: +# - deletes entries older than 30 days +# - ALWAYS keeps the newest N backups regardless of age (default 5) +# +# No-op on re-run. Opt out by removing /etc/cron.daily/currencicombo-prune-backups. + +set -euo pipefail + +BACKUP_DIR="${CC_BACKUP_DIR:-/var/lib/currencicombo/backups}" +RETAIN_DAYS="${CC_BACKUP_RETAIN_DAYS:-30}" +KEEP_MIN="${CC_BACKUP_KEEP_MIN:-5}" +CRON_FILE="/etc/cron.daily/currencicombo-prune-backups" +DRY_RUN=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + -h|--help) + cat <<'USAGE' +Usage: sudo ./install-prune-cron.sh [--dry-run] + +Env overrides: + CC_BACKUP_DIR (default: /var/lib/currencicombo/backups) + CC_BACKUP_RETAIN_DAYS (default: 30) + CC_BACKUP_KEEP_MIN (default: 5) +USAGE + exit 0 ;; + *) echo "unknown arg: $1" >&2; exit 2 ;; + esac +done + +log() { printf '[install-prune-cron] %s\n' "$*" >&2; } +die() { printf '[install-prune-cron][FATAL] %s\n' "$*" >&2; exit 1; } + +[[ "$EUID" -eq 0 ]] || die "must run as root (sudo)" + +# The pruner script body. Runs daily via cron.daily. +# KEEP_MIN is enforced by listing backups newest-first, skipping the +# first KEEP_MIN, then deleting any remaining entries older than +# RETAIN_DAYS. This means we always keep at least KEEP_MIN (even if +# they're all <30 days old), and never delete one of the newest +# KEEP_MIN (even if it's >30 days old on a dormant host). +read -r -d '' PRUNER_BODY </dev/null | sort -rn | awk '{print \$2}') + +count=\${#all[@]} +if (( count <= KEEP_MIN )); then + logger -t currencicombo-prune "count=\$count <= KEEP_MIN=\$KEEP_MIN; nothing to prune" + exit 0 +fi + +cutoff=\$(date -d "\$RETAIN_DAYS days ago" +%s) +deleted=0 +kept=0 +for i in "\${!all[@]}"; do + p="\${all[\$i]}" + if (( i < KEEP_MIN )); then + kept=\$((kept + 1)) + continue + fi + mtime=\$(stat -c %Y "\$p" 2>/dev/null || echo 0) + if (( mtime < cutoff )); then + rm -rf -- "\$p" + deleted=\$((deleted + 1)) + else + kept=\$((kept + 1)) + fi +done +logger -t currencicombo-prune "deleted=\$deleted kept=\$kept total_before=\$count" +PRUNER + +if [[ "${DRY_RUN}" -eq 1 ]]; then + log "[dry-run] would write ${CRON_FILE} (0755) with pruner targeting ${BACKUP_DIR}, retain ${RETAIN_DAYS}d, keep-min ${KEEP_MIN}" + echo "---" + echo "${PRUNER_BODY}" + echo "---" + exit 0 +fi + +printf '%s\n' "${PRUNER_BODY}" > "${CRON_FILE}" +chmod 0755 "${CRON_FILE}" +chown root:root "${CRON_FILE}" + +log "installed ${CRON_FILE} (backups older than ${RETAIN_DAYS}d, keep-min ${KEEP_MIN}, target ${BACKUP_DIR})" +log "runs daily via /etc/cron.daily/. Opt out: sudo rm ${CRON_FILE}" +log "logs to syslog (tag currencicombo-prune); journalctl -t currencicombo-prune" diff --git a/scripts/deployment/install.sh b/scripts/deployment/install.sh new file mode 100755 index 0000000..c92537b --- /dev/null +++ b/scripts/deployment/install.sh @@ -0,0 +1,238 @@ +#!/usr/bin/env bash +# install.sh — idempotent first-time setup for CurrenciCombo on a systemd host. +# +# Intended to run ONCE per host as root (or with sudo). Running it again is +# safe: it will skip already-present artifacts and warn on conflicts. +# +# What this does: +# 1. Creates the `currencicombo` system user and /opt/currencicombo tree. +# 2. Installs nginx (Debian/Ubuntu or Alpine) if not present. +# 3. Ensures a local Postgres is running and creates a fresh +# `currencicombo` role + DB (refuses to touch an existing one unless +# --force-recreate is passed). +# 4. Ensures a local Redis is running. +# 5. Writes /etc/currencicombo/orchestrator.env from .env.prod.example, +# auto-populating EVENT_SIGNING_SECRET and ORCHESTRATOR_API_KEYS with +# fresh randoms the first time. +# 6. Installs /etc/currencicombo/webapp-nginx.conf. +# 7. Installs the two systemd units and runs `systemctl daemon-reload`. +# 8. Enables (does NOT start) both units. First start happens via +# scripts/deployment/deploy-currencicombo-8604.sh after the first +# successful build. +# +# This script is target-agnostic. It has no hardcoded IP / hostname / +# VLAN. The NPMplus ingress in front of it is configured separately — +# see scripts/deployment/README.md. + +set -euo pipefail + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +APP_USER="currencicombo" +APP_HOME="/opt/currencicombo" +ETC_DIR="/etc/currencicombo" +LOG_DIR="/var/log/currencicombo" +REPO_DIR="/var/lib/currencicombo/repo" +ENV_FILE="${ETC_DIR}/orchestrator.env" +NGINX_FILE="${ETC_DIR}/webapp-nginx.conf" +SYSTEMD_DIR="/etc/systemd/system" + +FORCE_RECREATE_DB=0 +DRY_RUN=0 +SKIP_NGINX_INSTALL=0 + +log() { printf '[install] %s\n' "$*" >&2; } +warn() { printf '[install][WARN] %s\n' "$*" >&2; } +die() { printf '[install][FATAL] %s\n' "$*" >&2; exit 1; } +run() { if [[ "${DRY_RUN}" -eq 1 ]]; then printf '[install][dry-run] %s\n' "$*" >&2; else eval "$*"; fi; } + +usage() { + cat <<'USAGE' +Usage: sudo ./install.sh [--force-recreate-db] [--skip-nginx-install] [--dry-run] + + --force-recreate-db DROP and recreate the currencicombo Postgres role + and DB even if they already exist. DESTRUCTIVE. + --skip-nginx-install Do not apt/apk install nginx (use if you already + have a custom nginx build in place). + --dry-run Print the commands that would run, don't run them. +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --force-recreate-db) FORCE_RECREATE_DB=1; shift ;; + --skip-nginx-install) SKIP_NGINX_INSTALL=1; shift ;; + --dry-run) DRY_RUN=1; shift ;; + -h|--help) usage; exit 0 ;; + *) die "unknown arg: $1" ;; + esac +done + +[[ "$EUID" -eq 0 ]] || die "must run as root (sudo)" + +# ---------------------------------------------------------------------- +# 1. User + tree +# ---------------------------------------------------------------------- +if id "${APP_USER}" >/dev/null 2>&1; then + log "user ${APP_USER} already exists" +else + log "creating system user ${APP_USER}" + run useradd --system --home-dir "${APP_HOME}" --shell /usr/sbin/nologin --user-group "${APP_USER}" +fi + +for d in "${APP_HOME}" "${APP_HOME}/orchestrator" "${APP_HOME}/webapp" \ + "${APP_HOME}/webapp/dist" "${ETC_DIR}" "${LOG_DIR}" "${REPO_DIR}"; do + run install -d -o "${APP_USER}" -g "${APP_USER}" -m 0755 "$d" +done +run chown "${APP_USER}:${APP_USER}" "${APP_HOME}" "${LOG_DIR}" "${REPO_DIR}" +run chmod 0750 "${ETC_DIR}" + +# ---------------------------------------------------------------------- +# 2. nginx (required by currencicombo-webapp.service) +# ---------------------------------------------------------------------- +if [[ "${SKIP_NGINX_INSTALL}" -eq 0 ]]; then + if command -v nginx >/dev/null 2>&1; then + log "nginx already installed ($(nginx -v 2>&1 | head -1))" + elif command -v apt-get >/dev/null 2>&1; then + log "installing nginx via apt" + run 'DEBIAN_FRONTEND=noninteractive apt-get update -q' + run 'DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nginx-light' + # We use our own nginx.conf via -c, so disable the distro site. + run systemctl disable --now nginx 2>/dev/null || true + elif command -v apk >/dev/null 2>&1; then + log "installing nginx via apk" + run apk add --no-cache nginx + run rc-update del nginx 2>/dev/null || true + else + die "no apt or apk available — install nginx manually or re-run with --skip-nginx-install" + fi +fi +[[ -f /etc/nginx/mime.types ]] || warn "/etc/nginx/mime.types missing; webapp-nginx.conf may fail" + +# ---------------------------------------------------------------------- +# 3. Postgres role + DB +# ---------------------------------------------------------------------- +if ! command -v psql >/dev/null 2>&1; then + die "psql not on PATH — install Postgres on this host (e.g. apt install postgresql) before running install.sh" +fi + +# Use the OS `postgres` superuser for DDL. +pg_role_exists() { + sudo -u postgres psql -tAc "SELECT 1 FROM pg_roles WHERE rolname='${APP_USER}';" 2>/dev/null | grep -q 1 +} +pg_db_exists() { + sudo -u postgres psql -tAc "SELECT 1 FROM pg_database WHERE datname='${APP_USER}';" 2>/dev/null | grep -q 1 +} + +if pg_role_exists; then + if [[ "${FORCE_RECREATE_DB}" -eq 1 ]]; then + log "dropping existing role/DB (--force-recreate-db)" + run "sudo -u postgres psql -c 'DROP DATABASE IF EXISTS ${APP_USER};'" + run "sudo -u postgres psql -c 'DROP ROLE IF EXISTS ${APP_USER};'" + else + warn "Postgres role ${APP_USER} already exists — skipping role/DB creation. Re-run with --force-recreate-db to wipe." + fi +fi + +if ! pg_role_exists; then + log "creating Postgres role ${APP_USER}" + run "sudo -u postgres psql -c \"CREATE ROLE ${APP_USER} LOGIN;\"" +fi +if ! pg_db_exists; then + log "creating Postgres database ${APP_USER}" + run "sudo -u postgres psql -c \"CREATE DATABASE ${APP_USER} OWNER ${APP_USER};\"" +fi +# Peer auth from the currencicombo OS user → currencicombo DB role "just works" +# on Debian-style pg_hba (local all all peer). No password needed. + +# ---------------------------------------------------------------------- +# 4. Redis +# ---------------------------------------------------------------------- +if systemctl list-unit-files | grep -q '^redis-server\.service'; then + run systemctl enable --now redis-server +elif systemctl list-unit-files | grep -q '^redis\.service'; then + run systemctl enable --now redis +elif command -v redis-cli >/dev/null 2>&1; then + warn "redis-cli present but no redis-server.service / redis.service unit — assuming external Redis" +else + warn "redis not detected; orchestrator will fall back to in-process event bus. Install redis for multi-replica support." +fi + +# ---------------------------------------------------------------------- +# 5. orchestrator.env +# ---------------------------------------------------------------------- +FIRST_KEYS_FILE="/root/currencicombo-first-keys.txt" +if [[ -f "${ENV_FILE}" ]]; then + log "${ENV_FILE} already exists — leaving alone (no new keys generated)" +else + log "writing ${ENV_FILE}" + install -o "${APP_USER}" -g "${APP_USER}" -m 0640 "${SCRIPT_DIR}/.env.prod.example" "${ENV_FILE}" + # Auto-fill the two REQUIRED secrets so first boot doesn't crash. + SECRET="$(openssl rand -hex 32)" + INIT_KEY="$(openssl rand -hex 24)" + SETT_KEY="$(openssl rand -hex 24)" + AUD_KEY="$(openssl rand -hex 24)" + run "sed -i 's|^EVENT_SIGNING_SECRET=.*|EVENT_SIGNING_SECRET=${SECRET}|' '${ENV_FILE}'" + run "sed -i 's|^ORCHESTRATOR_API_KEYS=.*|ORCHESTRATOR_API_KEYS=${INIT_KEY}:initiator,${SETT_KEY}:settler,${AUD_KEY}:auditor|' '${ENV_FILE}'" + # Write a root-only handoff file so ops can grab the keys without + # scraping journald or reading the env file. The canonical copy lives + # in ${ENV_FILE}; delete this file once the keys are in your password + # manager. + if [[ "${DRY_RUN}" -eq 0 ]]; then + umask 077 + cat > "${FIRST_KEYS_FILE}" <