proxmox/scripts/lib/non_blockchain_vm_routing_matrix.py

#!/usr/bin/env python3
"""
Generate / validate non-blockchain VM routing matrix vs live inventory.

See config/gitea-phoenix/README.md and docs/04-configuration/GITEA_PHOENIX_NON_BLOCKCHAIN_VM_ROUTING_CLEANUP_TASK.md
"""

from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path
from typing import Any


def load_json(path: Path) -> Any:
    return json.loads(path.read_text(encoding="utf-8"))


def guest_excluded(name: str, rules: list[dict[str, Any]]) -> bool:
    for r in rules:
        if r.get("kind") != "hostname_prefix":
            continue
        prefix = r.get("value", "")
        if prefix and name.startswith(prefix):
            return True
    return False


def categorize(name: str, vmid: str) -> str:
    v = int(vmid)
    if "npmplus" in name:
        return "npmplus"
    if 10233 <= v <= 10240:
        return "npmplus"
    if v == 5000 or "blockscout" in name:
        return "blockscout"
    if v in (7801, 7804, 7810) or "gov-portal" in name or "sankofa-portal" in name:
        return "gov_portals"
    if v == 8604 or "currencicombo" in name.lower():
        return "phoenix_app"
    if v == 5801 or "atomic-swap" in name:
        return "phoenix_app"
    if 5700 <= v <= 5705 or "act-runner" in name or "runner" in name:
        return "ci_runner"
    if 5410 <= v <= 5599 or "ccip" in name.lower() or "chainlink" in name.lower():
        return "ccip"
    if v >= 10000 or "dbis" in name or "postgres" in name or "redis" in name:
        return "sovereign_dbis"
    if 100 <= v <= 199:
        return "infrastructure"
    if "monitoring" in name or v == 130:
        return "monitoring"
    return "cluster_service"


_VMID_FROM_DESC = re.compile(r"\b(?:CT|VMID|LXC)\s+(\d{3,5})\b", re.IGNORECASE)


def _vmid_from_target_description(description: str) -> str | None:
    """Map deploy-target description text to a single guest VMID when unambiguous."""
    if not description:
        return None
    m = _VMID_FROM_DESC.search(description)
    if not m:
        return None
    return m.group(1)


def enrich_from_deploy_targets(
    repo_root: Path,
) -> dict[str, dict[str, Any]]:
    """Map VMID string -> gitea_repos, deploy_target, health_url from deploy-targets.json descriptions + healthchecks."""
    path = repo_root / "phoenix-deploy-api" / "deploy-targets.json"
    if not path.is_file():
        return {}
    data = load_json(path)
    out: dict[str, dict[str, Any]] = {}
    for t in data.get("targets", []):
        repo = t.get("repo")
        target = t.get("target")
        desc = t.get("description") or ""
        if not repo:
            continue
        vmid = _vmid_from_target_description(desc)
        hc = t.get("healthcheck") or {}
        url = hc.get("url") or ""
        if vmid is None and url:
            if "192.168.11.51:3000" in url:
                vmid = "7801"
            elif "blockscout.defi-oracle.io" in url or "/api/config/capabilities" in url:
                vmid = "5000"
        if vmid is None:
            continue
        cur = out.setdefault(vmid, {"gitea_repos": [], "deploy_targets": [], "health_urls": []})
        if repo not in cur["gitea_repos"]:
            cur["gitea_repos"].append(repo)
        if target and target not in cur["deploy_targets"]:
            cur["deploy_targets"].append(target)
        if url and url not in cur["health_urls"]:
            cur["health_urls"].append(url)
    for vmid, cur in out.items():
        cur["deploy_target"] = cur["deploy_targets"][0] if cur["deploy_targets"] else None
        cur["health_url"] = cur["health_urls"][0] if cur["health_urls"] else None
        del cur["deploy_targets"]
        del cur["health_urls"]
    return out


def build_entries(
    inventory: dict[str, Any],
    exclusions: dict[str, Any],
    repo_root: Path,
) -> list[dict[str, Any]]:
    rules = exclusions.get("rules", [])
    enrich = enrich_from_deploy_targets(repo_root)
    collected_at = inventory.get("collected_at", "")
    source = inventory.get("source", "")
    guests = inventory.get("guests", [])
    rows: list[dict[str, Any]] = []
    for g in guests:
        if g.get("status") != "running":
            continue
        vmid = str(g.get("vmid", "")).strip()
        name = (g.get("name") or "").strip()
        if not vmid or not name:
            continue
        if guest_excluded(name, rules):
            continue
        cat = categorize(name, vmid)
        ex = enrich.get(vmid, {})
        repos = list(ex.get("gitea_repos") or [])
        deploy_target = ex.get("deploy_target")
        health_url = ex.get("health_url")
        workflow = None
        if repos:
            slug = repos[0].split("/")[-1].lower()
            workflow = f".gitea/workflows/*.yml (see template for {slug})"
        notes = (
            f"Row from inventory snapshot collected_at={collected_at}; "
            "fill gitea_repos / deploy_target / health_url for Phoenix-backed services."
        )
        if repos:
            notes = (
                f"Partially enriched from deploy-targets.json healthchecks ({source}); "
                "confirm workflow_glob in repo."
            )
        rows.append(
            {
                "vmid": vmid,
                "hostname": name,
                "primary_ip": (g.get("ip") or "") or "",
                "category": cat,
                "gitea_repos": repos,
                "deploy_target": deploy_target,
                "workflow_glob": workflow,
                "health_url": health_url,
                "notes": notes,
            }
        )
    rows.sort(key=lambda r: int(r["vmid"]))
    return rows


def cmd_generate(args: argparse.Namespace) -> int:
    inv_path = Path(args.inventory)
    exc_path = Path(args.exclusions)
    out_path = Path(args.out)
    repo_root = Path(args.repo_root).resolve()
    inventory = load_json(inv_path)
    exclusions = load_json(exc_path)
    entries = build_entries(inventory, exclusions, repo_root)
    doc: dict[str, Any] = {
        "schemaVersion": "1",
        "generated_from_inventory_collected_at": inventory.get("collected_at"),
        "generated_from_inventory_source": inventory.get("source"),
        "allowed_missing": [],
        "entries": entries,
    }
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(doc, indent=2) + "\n", encoding="utf-8")
    print(f"Wrote {len(entries)} entries to {out_path}", file=sys.stderr)
    return 0


def cmd_validate(args: argparse.Namespace) -> int:
    inv_path = Path(args.inventory)
    mtx_path = Path(args.matrix)
    exc_path = Path(args.exclusions)
    inventory = load_json(inv_path)
    matrix = load_json(mtx_path)
    exclusions = load_json(exc_path)
    rules = exclusions.get("rules", [])
    running = [g for g in inventory.get("guests", []) if g.get("status") == "running"]
    in_scope = [
        str(g["vmid"])
        for g in running
        if g.get("name") and not guest_excluded(str(g["name"]), rules)
    ]
    inv_set = set(in_scope)
    allowed_raw = matrix.get("allowed_missing") or []
    allowed = {str(x["vmid"]): x.get("reason", "") for x in allowed_raw if x.get("vmid")}

    entries = matrix.get("entries")
    if not isinstance(entries, list) or not entries:
        print("ERROR: matrix.entries must be a non-empty array", file=sys.stderr)
        return 1
    matrix_ids: set[str] = set()
    for i, e in enumerate(entries):
        req = (
            "vmid",
            "hostname",
            "primary_ip",
            "category",
            "gitea_repos",
            "deploy_target",
            "workflow_glob",
            "health_url",
            "notes",
        )
        missing = [k for k in req if k not in e]
        if missing:
            print(f"ERROR: entry[{i}] missing keys: {missing}", file=sys.stderr)
            return 1
        vid = str(e["vmid"])
        if vid in matrix_ids:
            print(f"ERROR: duplicate vmid in matrix: {vid}", file=sys.stderr)
            return 1
        matrix_ids.add(vid)

    missing_vmids = sorted(inv_set - matrix_ids - set(allowed.keys()), key=int)
    if missing_vmids:
        for v in missing_vmids:
            print(f"ERROR: running in-scope VMID {v} missing from matrix (not in allowed_missing)", file=sys.stderr)
        return 1

    extra = sorted(matrix_ids - inv_set, key=int)
    for v in extra:
        print(f"WARN: matrix vmid {v} not in current inventory running in-scope set", file=sys.stderr)

    print(
        f"OK: inventory in-scope={len(inv_set)} matrix entries={len(matrix_ids)} allowed_missing={len(allowed)}",
        file=sys.stderr,
    )
    return 0


def main() -> int:
    ap = argparse.ArgumentParser(description=__doc__)
    sub = ap.add_subparsers(dest="cmd", required=True)

    g = sub.add_parser("generate", help="Write matrix JSON from inventory + exclusions")
    g.add_argument("--inventory", required=True, type=Path)
    g.add_argument("--exclusions", required=True, type=Path)
    g.add_argument("--out", required=True, type=Path)
    g.add_argument("--repo-root", type=Path, default=Path(__file__).resolve().parents[2])
    g.set_defaults(func=cmd_generate)

    v = sub.add_parser("validate", help="Fail if in-scope inventory VMIDs are not covered")
    v.add_argument("--inventory", required=True, type=Path)
    v.add_argument("--exclusions", required=True, type=Path)
    v.add_argument("--matrix", required=True, type=Path)
    v.set_defaults(func=cmd_validate)

    args = ap.parse_args()
    return int(args.func(args))


if __name__ == "__main__":
    raise SystemExit(main())