Files
proxmox/scripts/verify/build-deduped-onchain-inventory.py
2026-04-24 10:56:01 -07:00

164 lines
6.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Parse DEPLOYED_CONTRACTS_UNIFIED_EXTENDED.md, dedupe by (chain_id, address),
run on-chain checks, write DEPLOYED_CONTRACTS_DEDUPED_ONCHAIN_VERIFIED.md + JSON.
"""
from __future__ import annotations
import argparse
import json
import sys
import time
from pathlib import Path
from typing import Any
from inventory_onchain import (
ROOT,
UNIFIED_EXTENDED_PATH,
esc,
load_etherscan_key,
parse_table,
run_check,
)
from onchain_check_cache import cache_path_for, run_checks_with_cache
OUT_MD = ROOT / "reports/inventory/DEPLOYED_CONTRACTS_DEDUPED_ONCHAIN_VERIFIED.md"
OUT_JSON = ROOT / "reports/inventory/contract-inventory-onchain-snapshot.json"
CHECK_CACHE = cache_path_for(ROOT)
def main() -> int:
ap = argparse.ArgumentParser(
description="Deduped on-chain + explorer inventory (shared cache with build-extended-live-inventory.py)."
)
ap.add_argument(
"--no-cache",
action="store_true",
help="Ignore the on-disk check cache; re-run all network/API checks.",
)
ap.add_argument(
"--refresh-transient",
action="store_true",
help="Re-fetch only cache entries that look like rate limit / RPC errors.",
)
args = ap.parse_args()
use_cache = not args.no_cache
refresh = args.refresh_transient
if not UNIFIED_EXTENDED_PATH.is_file():
print("Missing", UNIFIED_EXTENDED_PATH, file=sys.stderr)
return 1
raw = parse_table(UNIFIED_EXTENDED_PATH)
if not raw:
print("No rows from", UNIFIED_EXTENDED_PATH, file=sys.stderr)
return 1
by_key: dict[tuple[str, str], dict[str, set[str] | str]] = {}
for r in raw:
k = (r["chain"], r["address"])
if k not in by_key:
by_key[k] = {
"chain": r["chain"],
"address": r["address"],
"names": set(),
"networks": set(),
"provenances": set(),
}
by_key[k]["names"].add(r["name"])
if r["network"]:
by_key[k]["networks"].add(r["network"])
if r["provenance"]:
by_key[k]["provenances"].add(r["provenance"])
key_order = sorted(
by_key.keys(),
key=lambda x: (int(x[0]) if str(x[0]).isdigit() else 999_999, x[1].lower()),
)
to_run = [by_key[k] for k in key_order]
print(f"Unique pairs: {len(to_run)}")
es_key = load_etherscan_key()
print("Etherscan key loaded:", bool(es_key))
print("Check cache file:", CHECK_CACHE)
to_run_slim: list[dict[str, Any]] = [
{"chain": b["chain"], "address": b["address"]} for b in to_run
]
by_addr = run_checks_with_cache(
to_run_slim,
es_key,
run_check,
CHECK_CACHE,
use_cache=use_cache,
refresh_transient=refresh,
max_workers=5,
progress_every=50,
)
merged: list[dict[str, Any]] = []
for k in key_order:
b = by_key[k]
r = by_addr[k]
merged.append(
{
**r,
"labels_merged": "; ".join(sorted(b["names"]))[:4000],
"networks_merged": " | ".join(sorted(b["networks"]))[:2000],
"provenance_merged": " | ".join(sorted(b["provenances"]))[:4000],
}
)
ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
snap = {
"generated_utc": ts,
"source_file": str(UNIFIED_EXTENDED_PATH.relative_to(ROOT)),
"dedupe_key": "chain_id + address lowercase",
"unique_pairs": len(merged),
"etherscan_key_used": bool(es_key),
"rows": merged,
}
OUT_JSON.write_text(json.dumps(snap, indent=2) + "\n", encoding="utf-8")
print("Wrote", OUT_JSON)
lines = [
"# Deployed contract addresses — deduplicated, sorted, on-chain verification snapshot",
"",
f"**Generated (UTC):** {ts}",
f"**Source (pre-dedup):** [`DEPLOYED_CONTRACTS_UNIFIED_EXTENDED.md`]({UNIFIED_EXTENDED_PATH.name})",
f"**Unique (chain × address) rows:** {len(merged)}",
f"**JSON snapshot:** [`{OUT_JSON.name}`]({OUT_JSON.name})",
f"**Regenerate:** `python3 scripts/verify/build-deduped-onchain-inventory.py` (same [check cache](contract-inventory-onchain-check-cache.json) as extended live — cache auto-invalidates when key RPC/Blockscout endpoints change; `--no-cache` / `--refresh-transient` also supported).",
"",
"## Organization",
"",
"1. Sorted by **numeric chain id**, then by **address** (hex).",
"2. **Duplicate contract addresses** (same chain + same address) merged; all former labels in **Labels (merged)**.",
"3. **On-chain** = `eth_getCode` (bytecode). **Sourcify** / **Blockscout (138)** / **Etherscan V2** = automated *source* publication where APIs allow.",
"",
"## How to read “verified and published”",
"",
"- A contract can have **bytecode on-chain** (`code_on_chain=yes`) but **source not yet published** on the explorer (common on private chains until you run the verify script).",
"- **Etherscan V2** requires a valid `ETHERSCAN_API_KEY` in the repo root or `smom-dbis-138` `.env` (not written into this file). **Cronos (25)** is not queried via Etherscan V2 here (not in the API); use a Cronos explorer for source state.",
"- **JSON-RPC** for `eth_getCode` uses a `User-Agent` (some public nodes return 403 without it). Override **Ethereum mainnet** with `ETHEREUM_MAINNET_RPC` if the default is blocked on your network.",
"- **“On-chain verification” in this file** = `eth_getCode` plus **automated** Sourcify/Blockscout/Etherscan; human **source** verification on explorers may still be pending.",
"- **Chain 138** uses [Blockscout](https://explorer.d-bis.org) get-api; 138 is not on Sourcify. See [BLOCKSCOUT_VERIFICATION_GUIDE.md](../../docs/08-monitoring/BLOCKSCOUT_VERIFICATION_GUIDE.md).",
"",
"## Table",
"",
"| # | Ch | Address | Labels (merged) | Code on chain | Sourcify | Blockscout or Etherscan (source) | Auto summary | Provenance (merged) |",
"|---:|---:|:---|:---|:---|:---|:---|:---|:---|",
]
for i, r in enumerate(merged, 1):
ch = r["chain"]
addr = r["address"] if r["address"].startswith("0x") else f"0x{r['address']}"
sfy = esc(r.get("source_sourcify", ""))[:300]
bs = esc(r.get("source_blockscout", ""))[:200]
es = esc(r.get("source_etherscan", ""))[:200]
if ch == "138":
ex_col = bs
else:
ex_col = es
lines.append(
f"| {i} | {ch} | `{addr}` | {esc(r.get('labels_merged', '')[:500])} | {r.get('code_on_chain') or ''} | {sfy} | {ex_col} | {esc(r.get('verification_summary', '')[:200])} | {esc(r.get('provenance_merged', '')[:350])} |"
)
OUT_MD.write_text("\n".join(lines) + "\n", encoding="utf-8")
print("Wrote", OUT_MD)
return 0
if __name__ == "__main__":
raise SystemExit(main())