From 57c717ad7e8a8775a5a802a57acb59b12095373a Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 18 Apr 2026 20:19:06 +0000 Subject: [PATCH] security(phase1a): Phoenix Vault rotation runbook + consumer-prep scaffolding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Part of the sequenced cleanup tracked in issue #1. Scaffolding only — no rotation executed, no secret values committed. - docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md: authoritative Phoenix Vault rotation procedure (9-step: new root → rekey unseal → regenerate AppRoles → flip consumers → revoke old). Verification table + rollback path + Phase 2 handoff notes. - docs/04-configuration/VAULT_SHARD_CUSTODY_POLICY.md: decision record for the next rotation. Three options documented (named-operator / cloud-KMS auto-unseal / Transit auto-unseal); selection pending operator sign-off before rotation executes. - scripts/verify/enumerate-vault-consumers.sh: read-only grep over the tree for VAULT_ROLE_ID / VAULT_SECRET_ID / auth/approle/login references; flags which top-level consumers need a coordinated .env update at §1.6 of the runbook. - scripts/verify/verify-vault-approle-auth.sh: post-rotation sanity check — posts AppRole login + token lookup-self; returns PASS/FAIL without echoing the Role ID, Secret ID, or client token. - phoenix-deploy-api/.env.example: added VAULT_ADDR / VAULT_ROLE_ID / VAULT_SECRET_ID placeholder block with a pointer to the runbook. No values committed. - mission-control/.env.example: NEW file (previously had none); documents the launchpad NEXT_PUBLIC_* vars and the same Vault AppRole placeholder block. Server-side only — never NEXT_PUBLIC_*. Rotation execution stays with Phoenix ops; this commit only stages the runbook + env scaffolding so the eventual rotation does not require inventing infrastructure mid-incident. Co-Authored-By: Nakamoto, S --- .../VAULT_SHARD_CUSTODY_POLICY.md | 78 ++++++++ .../PHOENIX_VAULT_ROTATION_RUNBOOK.md | 183 ++++++++++++++++++ mission-control/.env.example | 32 +++ phoenix-deploy-api/.env.example | 10 + scripts/verify/enumerate-vault-consumers.sh | 91 +++++++++ scripts/verify/verify-vault-approle-auth.sh | 74 +++++++ 6 files changed, 468 insertions(+) create mode 100644 docs/04-configuration/VAULT_SHARD_CUSTODY_POLICY.md create mode 100644 docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md create mode 100644 mission-control/.env.example create mode 100755 scripts/verify/enumerate-vault-consumers.sh create mode 100755 scripts/verify/verify-vault-approle-auth.sh diff --git a/docs/04-configuration/VAULT_SHARD_CUSTODY_POLICY.md b/docs/04-configuration/VAULT_SHARD_CUSTODY_POLICY.md new file mode 100644 index 00000000..a2bbd412 --- /dev/null +++ b/docs/04-configuration/VAULT_SHARD_CUSTODY_POLICY.md @@ -0,0 +1,78 @@ +# Vault Unseal Shard Custody Policy (Phoenix) + +**Status:** Draft — decision required before the next rotation in `PHOENIX_VAULT_ROTATION_RUNBOOK.md` §1.3. +**Scope:** Phoenix Vault cluster on `10.160.0.40/41/42`. Does not cover the dbis_core admin Vault (separate policy TBD in Phase 1c). + +--- + +## 1. Why this exists + +The previous custody pattern (unseal keys stored alongside the cluster deployment in `.secure/vault-credentials/`) contributed to the credential incident tracked in [issue #1](https://gitea.d-bis.org/d-bis/proxmox/issues/1) — the 5 unseal shards and the root token were committed to `master` on 2026-01-06 and 2026-02-12. The next rotation **must** move off that pattern before new shards are generated. + +This document records the available options and the selected policy. Operator sign-off on one option is a precondition for executing the runbook. + +--- + +## 2. Options under consideration + +### Option A — Named-operator custody (recommended default) + +- 5 shards, 3-of-5 threshold (unchanged). +- Each shard goes to a single named operator via an encrypted channel (age recipient key or PGP pubkey). No operator holds more than one shard. +- Shards live in the operator's personal secure store (password manager with 2FA, Yubikey + offline copy, hardware vault, etc.) — **never** in any repo, shared drive, or CI secret store. +- A registry of shard-holder names + recipient-key fingerprints is kept **out of the repo** (operator-only distribution list). + +Pros: fastest to implement; aligns with the documented 5-shard quorum already in use. +Cons: social / availability risk if 3 of 5 named operators are unreachable simultaneously. + +**Selecting this option requires:** naming the 5 operators and collecting their age/PGP recipient keys before rotation. + +### Option B — HSM / cloud-KMS auto-unseal + +- Replace manual unseal with Vault's auto-unseal against AWS KMS, GCP KMS, or a hardware HSM (YubiHSM2 / SoloKey v2 / etc.). +- Recovery keys (separate from unseal keys) still exist and follow Option A's distribution. + +Pros: no day-to-day quorum coordination; recovery keys used only for Vault-level key rotation / disaster recovery. +Cons: introduces a cloud-KMS or HSM dependency; requires an AWS/GCP account or hardware provisioning. + +**Selecting this option requires:** chosen KMS / HSM provisioned, `seal "awskms"` (or equivalent) stanza reviewed, one full sealed/unseal dry-run. + +### Option C — Transit auto-unseal (separate Vault) + +- Run a small "seal Vault" on a separate host (ideally outside r630-01/02) and auto-unseal Phoenix against its Transit engine. +- Recovery keys as in Option B. + +Pros: no external provider dependency; all-in-house. +Cons: we now have two Vaults to secure, and the seal Vault's unseal story recurses (still needs Option A or B). + +--- + +## 3. Selected policy + +| Field | Value | +|---|---| +| **Selected option** | _pending — fill in before rotation_ | +| **Chosen operators (Option A)** | _pending — to be recorded out of repo_ | +| **Chosen KMS/HSM (Option B/C)** | _pending_ | +| **Recovery-key custodians** | _pending_ | +| **Decision date** | _pending_ | +| **Decision maker(s)** | _pending_ | + +Once filled in, update this table and commit. **Do not** commit operator personal details (email, phone, key fingerprints) to the repo — record only that the policy is in effect and where the out-of-repo registry lives. + +--- + +## 4. What this policy forbids (non-negotiable) + +- Committing unseal keys, recovery keys, or root tokens to any repo (`.secure/` is **not** an exception). +- Storing shards in shared password-manager vaults or shared drives (i.e. more than one person with access). +- Emailing / pasting shard values in Slack / Matrix / Gitea issues in plaintext. +- Reusing old shards after a rekey operation — old shards must be destroyed (see runbook §1.3). + +--- + +## 5. References + +- Runbook that references this policy: [`docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md`](../runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md) §1.3 +- Vault docs: , +- Phoenix cluster deployment: `docs/04-configuration/PHOENIX_VAULT_CLUSTER_DEPLOYMENT.md` diff --git a/docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md b/docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md new file mode 100644 index 00000000..1d248111 --- /dev/null +++ b/docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md @@ -0,0 +1,183 @@ +# Phoenix Vault Rotation Runbook (Phase 1a) + +**Target:** Phoenix Vault cluster on `10.160.0.40/41/42` (VMID 8640/8641/8642 on r630-01/02) +**Trigger:** Credential incident — root token, 5 unseal shards, and 2 AppRole pairs were committed to `d-bis/proxmox` git history. Tracking: [issue #1](https://gitea.d-bis.org/d-bis/proxmox/issues/1). +**Prerequisite:** Phase 0 inventory complete; out-of-repo artefacts in operator hands. +**Out of scope in this runbook:** Chain 138 deployer rotation (Phase 1b), dbis_core admin Vault rotation (Phase 1c). + +This runbook is designed to be **executed by Phoenix ops** (you + whichever operators hold the existing unseal shards). The Devin agent stages this runbook, the consumer-update scaffolding, and the verification scripts; the operator executes the Vault commands on the cluster and approves each step. + +--- + +## 0. Preflight + +Before *anything*: + +1. **Confirm cluster health** — all three nodes unsealed, leader elected, replication lag = 0. + ```bash + for n in 10.160.0.40 10.160.0.41 10.160.0.42; do + VAULT_ADDR=http://$n:8200 vault status + done + ``` + +2. **Confirm you have the current unseal-key quorum** (3 of 5). If shards have rotated out of operator hands, escalate before continuing — you cannot rekey without them. + +3. **Enumerate current consumers** so we know what breaks when the AppRoles rotate: + ```bash + bash scripts/verify/enumerate-vault-consumers.sh + ``` + Expected consumers per repo evidence: + - `phoenix-deploy-api/` on VM/CT behind `api.phoenix.*` + - `mission-control/` on VM/CT behind the Phoenix portal domain + - `rpc-translator-138/` (separate AppRole `19cbc945-…`, documented in the rpc-translator deployment docs; rotate if in scope) + - Any systemd unit referencing `VAULT_ROLE_ID` / `VAULT_SECRET_ID` on hosts behind NPMplus + +4. **Confirm storage-tier backup exists** — a fresh Raft snapshot taken *today*, stored **offline**: + ```bash + VAULT_ADDR=http://10.160.0.40:8200 VAULT_TOKEN=$CURRENT_ROOT \ + vault operator raft snapshot save /tmp/phoenix-pre-rotation-$(date +%Y%m%d).snap + # scp to offline storage; do NOT commit under .secure/ (that path is now a Phase 2 deletion target). + ``` + +--- + +## 1. Rotation sequence (authoritative order) + +Order matters — the key cascade is: **new root token → rekey unseal shards → regenerate AppRoles → flip consumers → revoke old root**. + +### 1.1 Generate a new root token + +```bash +VAULT_ADDR=http://10.160.0.40:8200 vault operator generate-root -init +# Output: OTP, nonce — record both. +``` + +### 1.2 Provide the unseal-key quorum to complete new root + +```bash +# 3 of 5 operators each contribute, using the CURRENT unseal shards: +vault operator generate-root -nonce=$NONCE # prompts for unseal key, 3 times total +# Final output: encoded token; decode with OTP: +vault operator generate-root -decode=$ENCODED_TOKEN -otp=$OTP +# => new_root_token +``` + +**Hand off** `new_root_token` to the steps below via a secure channel; do **not** echo to logs, do **not** commit. + +### 1.3 Rekey the unseal shards (brand-new 5 shards, 3/5 threshold) + +```bash +VAULT_ADDR=http://10.160.0.40:8200 VAULT_TOKEN=$new_root_token \ + vault operator rekey -init -key-shares=5 -key-threshold=3 +# Collect 3 current unseal keys from operators, submit: +vault operator rekey -nonce=$REKEY_NONCE # prompts 3 times +# Output: 5 NEW unseal shards + new recovery key nonce +``` + +**Shard distribution policy change:** new shards **must not** go back into `.secure/vault-credentials/`. See [`docs/04-configuration/VAULT_SHARD_CUSTODY_POLICY.md`](../04-configuration/VAULT_SHARD_CUSTODY_POLICY.md) for the options (named-operator custody, HSM auto-unseal, or Transit engine wrap) and the decision record. + +**Destroy the old shards** physically / cryptographically once new shards are verified unsealing all three nodes successfully. + +### 1.4 Regenerate Phoenix-API AppRole + +```bash +VAULT_ADDR=http://10.160.0.40:8200 VAULT_TOKEN=$new_root_token \ + vault write -f auth/approle/role/phoenix-api/role-id-update # rolls Role ID +vault write -f auth/approle/role/phoenix-api/secret-id # issues new Secret ID +# Capture NEW role_id + secret_id; pass to §1.6. +``` + +### 1.5 Regenerate Phoenix-Portal AppRole + +```bash +vault write -f auth/approle/role/phoenix-portal/role-id-update +vault write -f auth/approle/role/phoenix-portal/secret-id +``` + +### 1.6 Update consumers (no secret values in-commit — env-var injection) + +The consumer-prep PR in this repo **adds** `VAULT_ADDR` / `VAULT_ROLE_ID` / `VAULT_SECRET_ID` entries to the consumer `.env.example` files and ships the `enumerate-vault-consumers.sh` + `verify-vault-approle-auth.sh` scripts. It does **not** embed any secret values. Operator flow after §1.4 / §1.5: + +1. On each consumer host, update the service's `.env` (or systemd drop-in) with the new Role ID / Secret ID. +2. Restart the service (`systemctl restart phoenix-deploy-api` / equivalent for `mission-control` / `rpc-translator-138`). +3. Verify the AppRole login succeeds: + ```bash + VAULT_ADDR= bash scripts/verify/verify-vault-approle-auth.sh + ``` +4. Verify the service's healthcheck is green (`/healthz`, `/api/v1/ping`, or equivalent). + +### 1.7 Revoke the old root token + +Only after all consumers are confirmed green on the new AppRoles: + +```bash +VAULT_ADDR=http://10.160.0.40:8200 VAULT_TOKEN=$new_root_token \ + vault token revoke hvs.PMJcL6Hk…ttZY # old leaked root — paste the value from operator notes, not this repo +``` + +Confirm revocation: +```bash +VAULT_TOKEN=hvs.PMJcL6Hk…ttZY vault token lookup 2>&1 | grep -i "permission denied\|invalid" +``` + +### 1.8 Revoke the old AppRole secret IDs + +For both Phoenix-API and Phoenix-Portal: +```bash +VAULT_TOKEN=$new_root_token vault write auth/approle/role/phoenix-api/secret-id-accessor/destroy \ + secret_id_accessor=$OLD_PHOENIX_API_ACCESSOR +VAULT_TOKEN=$new_root_token vault write auth/approle/role/phoenix-portal/secret-id-accessor/destroy \ + secret_id_accessor=$OLD_PHOENIX_PORTAL_ACCESSOR +``` + +### 1.9 Announce completion + +- Update this runbook's "Last executed" footer. +- Comment on [issue #1](https://gitea.d-bis.org/d-bis/proxmox/issues/1) with the rotation timestamp (no secret values). +- Hand off to Phase 2 (history rewrite) once this is confirmed green. + +--- + +## 2. Verification checklist + +Run after §1.6 on each consumer, and after §1.7 / §1.8 on the Vault cluster: + +| Check | Command | Expected | +|---|---|---| +| Cluster health | `for n in 10.160.0.40 10.160.0.41 10.160.0.42; do VAULT_ADDR=http://$n:8200 vault status; done` | 3 unsealed, 1 leader | +| New root usable | `VAULT_TOKEN=$new_root_token vault token lookup` | display `root` policy | +| Old root revoked | `VAULT_TOKEN=hvs.PMJcL6Hk…ttZY vault token lookup` | permission denied / invalid | +| Phoenix-API AppRole login | `bash scripts/verify/verify-vault-approle-auth.sh` with new creds | `auth.client_token` present | +| Phoenix-Portal AppRole login | same, with portal creds | same | +| Phoenix-API healthcheck | `curl https://api.phoenix.*/healthz` | HTTP 200 | +| mission-control healthcheck | `curl https://mission-control.*/api/ping` (or equivalent) | HTTP 200 | +| Old unseal keys rejected | `vault operator unseal ` on a sealed test node | "not a valid unseal key" | + +--- + +## 3. Rollback path + +If §1.4 / §1.5 succeed but consumers cannot authenticate with the new AppRole: + +1. Do **not** execute §1.7 — the old root token must remain live while you roll back. +2. Re-issue the previous Secret ID: `vault write -f auth/approle/role/phoenix-api/secret-id` and push the returned value into the consumer `.env`. (The old Secret ID cannot be reused, but a fresh one under the same Role ID lets you continue service while you debug.) +3. If even that fails, restore the Raft snapshot from §0.4 and re-enter rotation from §1.1 once the failure mode is understood. + +**Do not** restore snapshots that include `.secure/vault-backups/*.snapshot.gz` — those are known-leaked Phase 2 deletion targets. Use only the offline snapshot taken in §0.4 or earlier. + +--- + +## 4. Post-rotation cleanup (hands off to Phase 2) + +Once §1 is green: +- All files in `LEAKED_SECRETS_INVENTORY.md` §3 with VAULT_* categories become safe to redact / delete via `git filter-repo` (Phase 2). +- `.secure/vault-credentials/phoenix-approle-credentials-20260118.txt` + `phoenix-vault-credentials-20260118.txt` can be force-deleted from history. +- Force-push timing requires Gitea admin coordination per the plan. + +--- + +## 5. Last executed + +| Attempt | Date | Operator | Result | +|---|---|---|---| +| — | — | — | pending first rotation | diff --git a/mission-control/.env.example b/mission-control/.env.example new file mode 100644 index 00000000..f84b848d --- /dev/null +++ b/mission-control/.env.example @@ -0,0 +1,32 @@ +# Mission Control (unified operator console) — copy to .env.local and set values. +# All NEXT_PUBLIC_* vars are exposed to the browser; do not put secrets there. +# See ./README.md §Environment for the full list. + +# ----------------------------------------------------------------------------- +# Launchpad links (browser-exposed) +# ----------------------------------------------------------------------------- +NEXT_PUBLIC_HELPER_SCRIPTS_URL=http://localhost:3000 +NEXT_PUBLIC_EXPLORER_URL=https://explorer.d-bis.org +NEXT_PUBLIC_PHOENIX_DEPLOY_API_URL=http://localhost:4001/health +NEXT_PUBLIC_TESTNET_2138_RUNBOOK_URL= +NEXT_PUBLIC_ROUTE_MATRIX_URL= +NEXT_PUBLIC_DOCS_MASTER_URL= +NEXT_PUBLIC_OPERATIONAL_RUNBOOKS_URL= + +# ----------------------------------------------------------------------------- +# Repo root discovery (server-side) +# ----------------------------------------------------------------------------- +# Optional absolute monorepo root. If set but missing, Mission Control auto-detects. +MISSION_CONTROL_PROJECT_ROOT= +# Windows only: full path to bash.exe if outside default Git paths. +GIT_BASH_PATH= + +# ----------------------------------------------------------------------------- +# Phoenix Vault AppRole (server-side only; never NEXT_PUBLIC_*) +# See docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md. +# Leave blank until the Phase 1a rotation has issued the new Role ID / Secret ID. +# NEVER commit real values — these stay in the live .env.local on the portal host only. +# ----------------------------------------------------------------------------- +VAULT_ADDR= +VAULT_ROLE_ID= +VAULT_SECRET_ID= diff --git a/phoenix-deploy-api/.env.example b/phoenix-deploy-api/.env.example index 41cbfa08..ea47bab7 100644 --- a/phoenix-deploy-api/.env.example +++ b/phoenix-deploy-api/.env.example @@ -33,3 +33,13 @@ PHOENIX_PARTNER_KEYS= PUBLIC_SECTOR_MANIFEST_PATH= # Optional: proxmox repo root on host (manifest = $PHOENIX_REPO_ROOT/config/public-sector-program-manifest.json) PHOENIX_REPO_ROOT= + +# ----------------------------------------------------------------------------- +# Phoenix Vault AppRole (for services that fetch downstream secrets from Vault) +# See docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md. +# Leave blank until the Phase 1a rotation has issued the new Role ID / Secret ID. +# NEVER commit real values — these stay in the live .env on the deploy host only. +# ----------------------------------------------------------------------------- +VAULT_ADDR= +VAULT_ROLE_ID= +VAULT_SECRET_ID= diff --git a/scripts/verify/enumerate-vault-consumers.sh b/scripts/verify/enumerate-vault-consumers.sh new file mode 100755 index 00000000..b25dee02 --- /dev/null +++ b/scripts/verify/enumerate-vault-consumers.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# enumerate-vault-consumers.sh — list services in this workspace that consume +# Phoenix Vault AppRole credentials, so operators know what to update before +# executing PHOENIX_VAULT_ROTATION_RUNBOOK.md §1.6. +# +# Read-only. No credentials, no network calls. Safe to run anywhere. +# +# Usage: +# bash scripts/verify/enumerate-vault-consumers.sh [--verbose] +# +# Exit codes: +# 0 — enumeration completed +# non-zero — unexpected grep failure + +set -euo pipefail + +VERBOSE=0 +if [[ "${1:-}" == "--verbose" || "${1:-}" == "-v" ]]; then + VERBOSE=1 +fi + +# Resolve repo root from this script's location so it works from any cwd. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$REPO_ROOT" + +echo "== Vault consumer enumeration ==" +echo "Repo: $REPO_ROOT" +echo "Runbook: docs/runbooks/PHOENIX_VAULT_ROTATION_RUNBOOK.md" +echo + +# Patterns that indicate a Vault AppRole consumer. +PATTERNS=( + 'VAULT_ROLE_ID' + 'VAULT_SECRET_ID' + 'auth/approle/login' + 'approle.*phoenix' + 'phoenix-api.*role' + 'phoenix-portal.*role' +) + +# Paths to skip (vendored, generated, inventory artefacts, old scripts archive). +EXCLUDES=( + --exclude-dir=node_modules + --exclude-dir=.git + --exclude-dir=venv + --exclude-dir=__pycache__ + --exclude-dir=.secure + --exclude-dir=inventory-work + --exclude-dir=pr-workspace +) + +# Build the combined pattern. +JOINED_PATTERN="$(IFS='|'; echo "${PATTERNS[*]}")" + +# Collect unique containing files. +MATCHES=$(grep -rIn -E "$JOINED_PATTERN" "${EXCLUDES[@]}" \ + --include='*.sh' --include='*.js' --include='*.ts' --include='*.tsx' \ + --include='*.py' --include='*.service' --include='*.env*' \ + --include='*.yaml' --include='*.yml' --include='Dockerfile*' \ + --include='*.conf' --include='*.ini' \ + . 2>/dev/null || true) + +if [[ -z "$MATCHES" ]]; then + echo "No Vault AppRole consumer references found in tracked source paths." + echo "This does NOT mean Vault is unused — consumers may be deployed out-of-repo." + exit 0 +fi + +# Group by top-level directory (consumer). +echo "-- Grouped by top-level path --" +echo "$MATCHES" | awk -F: '{print $1}' | awk -F/ '{print $2}' | sort -u | while read -r top; do + COUNT=$(echo "$MATCHES" | awk -F: -v t="$top" '$1 ~ ("^\\./" t "/") {print $1}' | sort -u | wc -l) + printf " %-40s %d file(s)\n" "$top/" "$COUNT" +done + +if [[ "$VERBOSE" == "1" ]]; then + echo + echo "-- Full match list (file:line) --" + echo "$MATCHES" | sort -u +fi + +echo +echo "Consumers expected per repo evidence (rotate all of these when the Phoenix AppRoles roll):" +echo " - phoenix-deploy-api/ (systemd unit: phoenix-deploy-api.service; env: .env)" +echo " - mission-control/ (portal app; env: .env.local or systemd drop-in)" +echo " - rpc-translator-138/ (separate AppRole 19cbc945-…; rotate independently)" +echo +echo "If any of the above is missing from the grouped output, it is either:" +echo " (a) deployed out-of-repo with its own .env — ask the operator, or" +echo " (b) not yet integrated with Vault — consumer-prep work still pending." diff --git a/scripts/verify/verify-vault-approle-auth.sh b/scripts/verify/verify-vault-approle-auth.sh new file mode 100755 index 00000000..79e8f54a --- /dev/null +++ b/scripts/verify/verify-vault-approle-auth.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# verify-vault-approle-auth.sh — attempt AppRole login against a Vault cluster +# and report whether the returned client token works for a trivial token-lookup. +# +# Exits non-zero on any failure so it can be wired into a post-rotation check. +# Does NOT print the Role ID, Secret ID, or client token to stdout/stderr. +# +# Env: +# VAULT_ADDR (required) — e.g. http://10.160.0.40:8200 +# VAULT_ROLE_ID (required) +# VAULT_SECRET_ID (required) +# +# Usage: +# VAULT_ADDR=http://10.160.0.40:8200 VAULT_ROLE_ID=... VAULT_SECRET_ID=... \ +# bash scripts/verify/verify-vault-approle-auth.sh + +set -euo pipefail + +: "${VAULT_ADDR:?VAULT_ADDR is required (e.g. http://10.160.0.40:8200)}" +: "${VAULT_ROLE_ID:?VAULT_ROLE_ID is required}" +: "${VAULT_SECRET_ID:?VAULT_SECRET_ID is required}" + +if ! command -v curl >/dev/null 2>&1; then + echo "ERROR: curl is required" >&2 + exit 2 +fi +if ! command -v jq >/dev/null 2>&1; then + echo "ERROR: jq is required (apt-get install -y jq)" >&2 + exit 2 +fi + +echo "== Vault AppRole auth check ==" +echo "VAULT_ADDR=$VAULT_ADDR" +echo "VAULT_ROLE_ID=[REDACTED — ${#VAULT_ROLE_ID} chars]" +echo "VAULT_SECRET_ID=[REDACTED — ${#VAULT_SECRET_ID} chars]" +echo + +# Step 1 — login. +LOGIN_BODY=$(jq -n --arg r "$VAULT_ROLE_ID" --arg s "$VAULT_SECRET_ID" \ + '{role_id: $r, secret_id: $s}') + +LOGIN_RESP=$(curl -sS --fail-with-body -X POST \ + -H "Content-Type: application/json" \ + -d "$LOGIN_BODY" \ + "$VAULT_ADDR/v1/auth/approle/login" 2>&1) || { + echo "FAIL: AppRole login returned non-2xx" >&2 + echo "$LOGIN_RESP" | sed -E 's/(client_token|secret_id|role_id)[" :]+"[^"]*"/\1":"[REDACTED]"/g' >&2 + exit 3 + } + +CLIENT_TOKEN=$(echo "$LOGIN_RESP" | jq -r '.auth.client_token // empty') +if [[ -z "$CLIENT_TOKEN" || "$CLIENT_TOKEN" == "null" ]]; then + echo "FAIL: AppRole login succeeded but auth.client_token missing" >&2 + exit 4 +fi +echo " login: OK (client_token length=${#CLIENT_TOKEN})" + +# Step 2 — token lookup-self with the returned token. +LOOKUP_RESP=$(curl -sS --fail-with-body \ + -H "X-Vault-Token: $CLIENT_TOKEN" \ + "$VAULT_ADDR/v1/auth/token/lookup-self" 2>&1) || { + echo "FAIL: token lookup-self returned non-2xx" >&2 + exit 5 + } + +POLICIES=$(echo "$LOOKUP_RESP" | jq -r '.data.policies // [] | join(",")') +TTL=$(echo "$LOOKUP_RESP" | jq -r '.data.ttl // 0') +echo " lookup-self: OK (policies=${POLICIES:-none}, ttl=${TTL}s)" + +# Clear the token var so it does not leak into parent env if sourced. +unset CLIENT_TOKEN LOGIN_RESP LOGIN_BODY + +echo +echo "PASS: AppRole credentials authenticate against $VAULT_ADDR" -- 2.34.1