Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands - CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround - CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check - NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere - MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates - LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference Co-authored-by: Cursor <cursoragent@cursor.com>
303 lines
9.4 KiB
Bash
Executable File
303 lines
9.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Idempotent remediation for RPC node stability on Proxmox.
|
|
#
|
|
# What it fixes (optionally):
|
|
# 1) Storage node restriction mismatch:
|
|
# - Ensures the storage backing RPC VMID rootfs (e.g., local-lvm) is allowed on the node
|
|
# where the VMID is running (prevents "storage 'local-lvm' is not available on node ..." failures).
|
|
# 2) Besu heap oversizing:
|
|
# - Ensures BESU_OPTS (-Xms/-Xmx) in /etc/systemd/system/besu-rpc.service is sized to container memory.
|
|
#
|
|
# Safety:
|
|
# - Default is DRY-RUN (no changes).
|
|
# - Use --apply to perform changes.
|
|
# - Service restarts are opt-in via --restart-besu.
|
|
#
|
|
# Usage:
|
|
# PROXMOX_HOST=192.168.11.10 ./scripts/remediate-proxmox-rpc-stability.sh
|
|
# PROXMOX_HOST=192.168.11.10 ./scripts/remediate-proxmox-rpc-stability.sh --apply --restart-besu
|
|
#
|
|
# Options:
|
|
# --apply Apply changes (otherwise dry-run)
|
|
# --restart-besu Restart besu-rpc inside affected VMIDs (only with --apply)
|
|
# --only-storage Only apply storage.cfg remediation
|
|
# --only-heap Only apply heap remediation
|
|
# --vmids "..." Override VMID list (space-separated)
|
|
|
|
set -euo pipefail
|
|
|
|
PROXMOX_HOST="${PROXMOX_HOST:-192.168.11.10}"
|
|
|
|
APPLY=0
|
|
RESTART_BESU=0
|
|
ONLY_STORAGE=0
|
|
ONLY_HEAP=0
|
|
|
|
VMIDS_DEFAULT=(2400 2401 2402 2500 2501 2502 2503 2504 2505 2506 2507 2508)
|
|
VMIDS=("${VMIDS_DEFAULT[@]}")
|
|
|
|
usage() {
|
|
sed -n '1,80p' "$0" | sed 's/^# \{0,1\}//'
|
|
}
|
|
|
|
log() { echo "[$(date -Is)] $*"; }
|
|
die() { echo "ERROR: $*" >&2; exit 1; }
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--apply) APPLY=1; shift ;;
|
|
--restart-besu) RESTART_BESU=1; shift ;;
|
|
--only-storage) ONLY_STORAGE=1; shift ;;
|
|
--only-heap) ONLY_HEAP=1; shift ;;
|
|
--vmids)
|
|
shift
|
|
[[ $# -gt 0 ]] || die "--vmids requires a value"
|
|
# shellcheck disable=SC2206
|
|
VMIDS=($1)
|
|
shift
|
|
;;
|
|
-h|--help) usage; exit 0 ;;
|
|
*) die "Unknown arg: $1 (use --help)" ;;
|
|
esac
|
|
done
|
|
|
|
if [[ $ONLY_STORAGE -eq 1 && $ONLY_HEAP -eq 1 ]]; then
|
|
die "Choose at most one of --only-storage / --only-heap"
|
|
fi
|
|
|
|
if [[ $RESTART_BESU -eq 1 && $APPLY -ne 1 ]]; then
|
|
die "--restart-besu requires --apply"
|
|
fi
|
|
|
|
ssh_pve() {
|
|
ssh -o StrictHostKeyChecking=no -o BatchMode=yes -o ConnectTimeout=6 "root@${PROXMOX_HOST}" "$@"
|
|
}
|
|
|
|
remote_node="$(ssh_pve "hostname" 2>/dev/null || true)"
|
|
[[ -n "${remote_node}" ]] || die "Unable to SSH to root@${PROXMOX_HOST}"
|
|
|
|
log "Proxmox host: ${PROXMOX_HOST} (node name: ${remote_node})"
|
|
log "Mode: $([[ $APPLY -eq 1 ]] && echo APPLY || echo DRY-RUN)"
|
|
log "VMIDs: ${VMIDS[*]}"
|
|
echo
|
|
|
|
recommend_heap() {
|
|
# Input: memory MB
|
|
# Output: Xms Xmx (strings suitable for BESU_OPTS)
|
|
local mem_mb="$1"
|
|
if [[ "$mem_mb" =~ ^[0-9]+$ ]]; then
|
|
:
|
|
else
|
|
echo "1g 2g"
|
|
return 0
|
|
fi
|
|
|
|
if (( mem_mb >= 16384 )); then
|
|
echo "8g 8g"
|
|
elif (( mem_mb >= 8192 )); then
|
|
echo "2g 4g"
|
|
elif (( mem_mb >= 6144 )); then
|
|
echo "2g 4g"
|
|
elif (( mem_mb >= 4096 )); then
|
|
echo "1g 2g"
|
|
else
|
|
echo "512m 1g"
|
|
fi
|
|
}
|
|
|
|
get_vmid_field() {
|
|
# Usage: get_vmid_field <vmid> <field> (e.g. memory, swap, rootfs, hostname)
|
|
local vmid="$1"
|
|
local field="$2"
|
|
ssh_pve "pct config ${vmid} 2>/dev/null | sed -n 's/^${field}: //p' | head -1" 2>/dev/null | tr -d '\r'
|
|
}
|
|
|
|
vmid_status() {
|
|
local vmid="$1"
|
|
ssh_pve "pct status ${vmid} 2>/dev/null | sed -n 's/^status: //p'" 2>/dev/null | tr -d '\r'
|
|
}
|
|
|
|
########################################
|
|
# 1) Storage remediation (storage.cfg)
|
|
########################################
|
|
storage_changes=0
|
|
if [[ $ONLY_HEAP -ne 1 ]]; then
|
|
log "Storage remediation: scanning VMID rootfs storages vs storage.cfg node allowlist"
|
|
storages_needed=()
|
|
|
|
for vmid in "${VMIDS[@]}"; do
|
|
st="$(vmid_status "$vmid" || true)"
|
|
rootfs="$(get_vmid_field "$vmid" "rootfs" || true)"
|
|
[[ -n "${rootfs}" ]] || continue
|
|
storage="${rootfs%%:*}"
|
|
if [[ -n "${storage}" ]]; then
|
|
storages_needed+=("${storage}")
|
|
fi
|
|
log " VMID ${vmid}: status=${st:-?} rootfs=${rootfs}"
|
|
done
|
|
|
|
# Unique storages
|
|
unique_storages=()
|
|
while IFS= read -r s; do unique_storages+=("$s"); done < <(printf "%s\n" "${storages_needed[@]}" | sort -u)
|
|
|
|
if [[ ${#unique_storages[@]} -eq 0 ]]; then
|
|
log " No storages detected from VMID rootfs; skipping storage remediation."
|
|
else
|
|
log " Storages referenced by VMID rootfs: ${unique_storages[*]}"
|
|
fi
|
|
|
|
for storage in "${unique_storages[@]}"; do
|
|
# Only handle storages defined in storage.cfg and restricted by nodes=.
|
|
# If nodes= isn't present, it's cluster-wide.
|
|
allowed_nodes="$(ssh_pve "python3 - <<'PY'
|
|
from pathlib import Path
|
|
cfg = Path('/etc/pve/storage.cfg').read_text(encoding='utf-8')
|
|
storage = ${storage@Q}
|
|
stype = None
|
|
in_section = False
|
|
nodes = None
|
|
for line in cfg.splitlines():
|
|
if line.startswith('dir: ') or line.startswith('lvmthin: ') or line.startswith('zfspool: ') or line.startswith('lvm: '):
|
|
in_section = line.split(':',1)[1].strip() == storage
|
|
nodes = None
|
|
continue
|
|
if in_section and line.strip().startswith('nodes '):
|
|
nodes = line.strip().split(None,1)[1]
|
|
break
|
|
print(nodes or '')
|
|
PY" 2>/dev/null | tr -d '\r')"
|
|
|
|
if [[ -z "${allowed_nodes}" ]]; then
|
|
log " Storage '${storage}': no nodes restriction found (OK)"
|
|
continue
|
|
fi
|
|
|
|
if echo "${allowed_nodes}" | tr ',' '\n' | grep -qx "${remote_node}"; then
|
|
log " Storage '${storage}': node '${remote_node}' already allowed (OK)"
|
|
continue
|
|
fi
|
|
|
|
storage_changes=$((storage_changes+1))
|
|
log " Storage '${storage}': node '${remote_node}' NOT allowed (nodes=${allowed_nodes})"
|
|
if [[ $APPLY -eq 1 ]]; then
|
|
log " Applying: add '${remote_node}' to storage.cfg for ${storage}"
|
|
ssh_pve "bash -s" <<EOS
|
|
set -euo pipefail
|
|
CFG=/etc/pve/storage.cfg
|
|
TS=\$(date +%Y%m%d_%H%M%S)
|
|
cp -a "\$CFG" "/root/storage.cfg.bak.\$TS"
|
|
python3 - <<'PY'
|
|
from __future__ import annotations
|
|
from pathlib import Path
|
|
|
|
cfg = Path('/etc/pve/storage.cfg')
|
|
storage = ${storage@Q}
|
|
node = ${remote_node@Q}
|
|
|
|
lines = cfg.read_text(encoding='utf-8').splitlines(True)
|
|
out = []
|
|
in_section = False
|
|
updated = False
|
|
|
|
for line in lines:
|
|
if line.startswith('dir: ') or line.startswith('lvmthin: ') or line.startswith('zfspool: ') or line.startswith('lvm: '):
|
|
in_section = line.split(':',1)[1].strip() == storage
|
|
out.append(line)
|
|
continue
|
|
if in_section and line.lstrip().startswith('nodes '):
|
|
indent = line[: len(line) - len(line.lstrip())]
|
|
nodes_str = line.strip().split(None, 1)[1] if len(line.strip().split(None, 1)) > 1 else ''
|
|
parts = [p.strip() for p in nodes_str.split(',') if p.strip()]
|
|
if node not in parts:
|
|
parts.append(node)
|
|
updated = True
|
|
out.append(f"{indent}nodes {','.join(parts)}\\n")
|
|
continue
|
|
out.append(line)
|
|
|
|
cfg.write_text(''.join(out), encoding='utf-8')
|
|
print('updated' if updated else 'no_change')
|
|
PY
|
|
EOS
|
|
else
|
|
log " DRY-RUN: would add '${remote_node}' to storage.cfg nodes= for storage '${storage}'"
|
|
fi
|
|
done
|
|
echo
|
|
fi
|
|
|
|
########################################
|
|
# 2) Heap remediation (BESU_OPTS)
|
|
########################################
|
|
heap_changes=0
|
|
if [[ $ONLY_STORAGE -ne 1 ]]; then
|
|
log "Besu heap remediation: scanning BESU_OPTS vs container memory"
|
|
UNIT="/etc/systemd/system/besu-rpc.service"
|
|
|
|
for vmid in "${VMIDS[@]}"; do
|
|
st="$(vmid_status "$vmid" || true)"
|
|
mem="$(get_vmid_field "$vmid" "memory" || true)"
|
|
hostn="$(get_vmid_field "$vmid" "hostname" || true)"
|
|
rec="$(recommend_heap "${mem:-0}")"
|
|
xms="${rec%% *}"
|
|
xmx="${rec##* }"
|
|
needs=0
|
|
|
|
if [[ "${st}" != "running" ]]; then
|
|
log " VMID ${vmid} (${hostn:-?}): status=${st:-?} -> skipping heap check"
|
|
continue
|
|
fi
|
|
|
|
current_line="$(ssh_pve "pct exec ${vmid} -- bash -lc \"grep -n 'BESU_OPTS' ${UNIT} 2>/dev/null | head -1\"" 2>/dev/null | tr -d '\r' || true)"
|
|
if [[ -z "${current_line}" ]]; then
|
|
log " VMID ${vmid} (${hostn:-?} mem=${mem}MB): BESU_OPTS line missing -> skipping"
|
|
continue
|
|
fi
|
|
|
|
if echo "${current_line}" | grep -q -- "-Xms${xms}"; then
|
|
:
|
|
else
|
|
needs=1
|
|
fi
|
|
if echo "${current_line}" | grep -q -- "-Xmx${xmx}"; then
|
|
:
|
|
else
|
|
needs=1
|
|
fi
|
|
|
|
if [[ "${needs}" -eq 0 ]]; then
|
|
log " VMID ${vmid} (${hostn:-?} mem=${mem}MB): OK (${current_line})"
|
|
continue
|
|
fi
|
|
|
|
heap_changes=$((heap_changes+1))
|
|
log " VMID ${vmid} (${hostn:-?} mem=${mem}MB): needs heap update -> -Xms${xms} -Xmx${xmx}"
|
|
log " current: ${current_line}"
|
|
|
|
if [[ $APPLY -eq 1 ]]; then
|
|
ts="$(date +%Y%m%d_%H%M%S)"
|
|
log " Applying: update ${UNIT} (backup .bak.${ts})"
|
|
ssh_pve "pct exec ${vmid} -- bash -lc \"set -e; cp -a ${UNIT} ${UNIT}.bak.${ts}; sed -i 's/^Environment=\\\"BESU_OPTS=.*/Environment=\\\"BESU_OPTS=-Xms${xms} -Xmx${xmx}\\\"/' ${UNIT}; grep -n 'BESU_OPTS' ${UNIT}\""
|
|
if [[ $RESTART_BESU -eq 1 ]]; then
|
|
log " Restarting besu-rpc"
|
|
ssh_pve "pct exec ${vmid} -- bash -lc \"set -e; systemctl daemon-reload; systemctl restart besu-rpc\""
|
|
else
|
|
log " NOTE: besu-rpc not restarted (use --restart-besu)"
|
|
fi
|
|
else
|
|
log " DRY-RUN: would set BESU_OPTS=-Xms${xms} -Xmx${xmx} and optionally restart"
|
|
fi
|
|
unset needs
|
|
done
|
|
echo
|
|
fi
|
|
|
|
log "Done."
|
|
log "Planned/applied changes summary:"
|
|
log " storage adjustments needed: ${storage_changes}"
|
|
log " heap adjustments needed: ${heap_changes}"
|
|
if [[ $APPLY -eq 0 ]]; then
|
|
log "Run again with --apply (and optionally --restart-besu) to enforce changes."
|
|
fi
|
|
|