#!/usr/bin/env bash # Fix all validators: remove legacy tx-pool options, normalize validator networking, # and restart besu-validator. Run from project root (sources config/ip-addresses.conf). # # Usage: bash scripts/fix-all-validators-and-txpool.sh [--dry-run] set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" cd "$PROJECT_ROOT" [ -f config/ip-addresses.conf ] && source config/ip-addresses.conf 2>/dev/null || true [ -f scripts/lib/load-project-env.sh ] && source scripts/lib/load-project-env.sh 2>/dev/null || true PROXMOX_SSH_USER="${PROXMOX_SSH_USER:-root}" R630_01="${PROXMOX_HOST_R630_01:-${PROXMOX_R630_01:-192.168.11.11}}" validator_host() { local vmid="$1" if type get_host_for_vmid >/dev/null 2>&1; then get_host_for_vmid "$vmid" elif [[ "$vmid" -le 1002 ]]; then echo "$R630_01" else echo "${PROXMOX_HOST_ML110:-192.168.11.10}" fi } validator_ip() { local vmid="$1" case "$vmid" in 1000) echo "${IP_VALIDATOR_0:-192.168.11.100}" ;; 1001) echo "${IP_VALIDATOR_1:-192.168.11.101}" ;; 1002) echo "${IP_VALIDATOR_2:-192.168.11.102}" ;; 1003) echo "${IP_VALIDATOR_3:-192.168.11.103}" ;; 1004) echo "${IP_VALIDATOR_4:-192.168.11.104}" ;; *) return 1 ;; esac } VALIDATORS=( "1000:$(validator_host 1000)" "1001:$(validator_host 1001)" "1002:$(validator_host 1002)" "1003:$(validator_host 1003)" "1004:$(validator_host 1004)" ) CONFIG_PATH="/etc/besu/config-validator.toml" RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } DRY_RUN=false [[ "${1:-}" = "--dry-run" ]] && DRY_RUN=true echo "" echo "=== Fix All Validators + Tx-Pool (Evict Stuck) ===" echo "" fix_one() { local vmid="$1" local host="$2" local validator_ip local ssh_target="${PROXMOX_SSH_USER}@${host}" validator_ip="$(validator_ip "$vmid")" || { log_error " Could not determine validator IP for VMID $vmid" return 1 } log_info "Validator $vmid on $host" # Check container running local status status=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$ssh_target" "pct status $vmid 2>/dev/null" | awk '{print $2}' || echo "unknown") if [[ "$status" != "running" ]]; then log_warn " VMID $vmid not running (status: $status) — skip" return 0 fi if "$DRY_RUN"; then log_info " [dry-run] Would remove legacy tx-pool lines, set p2p-host=${validator_ip}, force sync-mode=FULL, and restart besu-validator" return 0 fi # Remove legacy options (cause crash with layered pool); normalize validator networking. ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$ssh_target" "pct exec $vmid -- bash -c ' set -e CFG=/etc/besu/config-validator.toml [ -f /config/config-validator.toml ] && CFG=/config/config-validator.toml if [ ! -f \"\$CFG\" ]; then echo \"Config not found: \$CFG\"; exit 1; fi sed -i \"/^tx-pool-max-size=/d\" \"\$CFG\" 2>/dev/null || true sed -i \"/^tx-pool-limit-by-account-percentage=/d\" \"\$CFG\" 2>/dev/null || true sed -i \"/^tx-pool-retention-hours=/d\" \"\$CFG\" 2>/dev/null || true sed -i \"s|^p2p-host=.*|p2p-host=\\\"${validator_ip}\\\"|\" \"\$CFG\" sed -i \"s|^sync-mode=.*|sync-mode=\\\"FULL\\\"|\" \"\$CFG\" grep -q \"^p2p-host=\" \"\$CFG\" || echo \"p2p-host=\\\"${validator_ip}\\\"\" >> \"\$CFG\" grep -q \"^sync-mode=\" \"\$CFG\" || echo \"sync-mode=\\\"FULL\\\"\" >> \"\$CFG\" if ! grep -q \"tx-pool-max-future-by-sender\" \"\$CFG\"; then echo \"\" >> \"\$CFG\" echo \"# Layered Transaction Pool (Besu 23.10+; keep future queue tight)\" >> \"\$CFG\" echo \"tx-pool-max-future-by-sender=1\" >> \"\$CFG\" echo \"tx-pool-layer-max-capacity=12500000\" >> \"\$CFG\" echo \"tx-pool-max-prioritized=2000\" >> \"\$CFG\" echo \"tx-pool-price-bump=10\" >> \"\$CFG\" else sed -i \"s/^tx-pool-max-future-by-sender=.*/tx-pool-max-future-by-sender=1/\" \"\$CFG\" fi # tx-pool-min-score=0 not added: unsupported in some Besu builds (causes Unknown option and crash loop) sed -i \"/^tx-pool-min-score=/d\" \"\$CFG\" 2>/dev/null || true '" 2>/dev/null || { log_error " SSH/config update failed"; return 1; } log_success " Config updated" # Restart with a hard fallback so a wedged Besu process cannot block the whole recovery. if ! ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$ssh_target" "pct exec $vmid -- bash -lc ' timeout 30 systemctl restart besu-validator || { systemctl kill -s SIGKILL besu-validator || true sleep 2 systemctl reset-failed besu-validator || true systemctl start besu-validator } '" 2>/dev/null; then log_error " Restart failed" return 1 fi log_success " besu-validator restarted" return 0 } FAILED=0 for entry in "${VALIDATORS[@]}"; do IFS=: read -r vmid host <<< "$entry" if ! fix_one "$vmid" "$host"; then ((FAILED++)) || true fi echo "" done echo "=== Summary ===" if [[ $FAILED -eq 0 ]]; then log_success "All validators updated and restarted (or skipped if not running)." log_info "Block production should resume once quorum is met. Check: bash scripts/monitoring/monitor-blockchain-health.sh" else log_error "$FAILED validator(s) failed. Check SSH and container status." exit 1 fi