Files
proxmox/scripts/investigate-rpc-transaction-failures.sh
defiQUG b3a8fe4496
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
chore: sync all changes to Gitea
- Config, docs, scripts, and backup manifests
- Submodule refs unchanged (m = modified content in submodules)

Made-with: Cursor
2026-03-02 11:37:34 -08:00

248 lines
9.9 KiB
Bash
Executable File

#!/usr/bin/env bash
# Investigate Transaction Failures on All RPC Nodes
# Checks logs, transaction pool, recent transactions, and node status
set -euo pipefail
# Load IP configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
log_section() { echo -e "${CYAN}════════════════════════════════════════${NC}"; }
# RPC Nodes - All running nodes
declare -A RPC_NODES
RPC_NODES[2400]="${RPC_THIRDWEB_PRIMARY:-${RPC_THIRDWEB_PRIMARY:-192.168.11.240}}:thirdweb-rpc-1"
RPC_NODES[2401]="${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-192.168.11.241}}}}}}}:thirdweb-rpc-2"
RPC_NODES[2402]="${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-192.168.11.242}}}}}}}:thirdweb-rpc-3"
RPC_NODES[2201]="${RPC_PUBLIC_1:-192.168.11.221}:besu-rpc-public-1"
RPC_NODES[2501]="${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-192.168.11.251}}}}}}}:besu-rpc-2"
RPC_NODES[2502]="${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-192.168.11.252}}}}}}}:besu-rpc-3"
RPC_NODES[2505]="${IP_VAULT_PHOENIX_2:-192.168.11.201}:besu-rpc-luis-0x8a"
RPC_NODES[2506]="${IP_SERVICE_202:-${IP_SERVICE_202:-192.168.11.202}}:besu-rpc-luis-0x1"
RPC_NODES[2507]="${IP_SERVICE_203:-${IP_SERVICE_203:-192.168.11.203}}:besu-rpc-putu-0x8a"
RPC_NODES[2508]="${IP_SERVICE_204:-${IP_SERVICE_204:-192.168.11.204}}:besu-rpc-putu-0x1"
PROXMOX_HOST="${PROXMOX_HOST:-192.168.11.10}"
# Function to execute RPC call
rpc_call() {
local ip="$1"
local method="$2"
local params="${3:-[]}"
local port="${4:-8545}"
curl -s -X POST "http://${ip}:${port}" \
-H 'Content-Type: application/json' \
-d "{\"jsonrpc\":\"2.0\",\"method\":\"${method}\",\"params\":${params},\"id\":1}" 2>/dev/null || echo "{}"
}
# Function to check node
check_node() {
local vmid="$1"
local ip_hostname="$2"
local ip="${ip_hostname%%:*}"
local hostname="${ip_hostname#*:}"
log_section
log_info "Checking VMID ${vmid} - ${hostname} (${ip})"
log_section
echo ""
# 1. Check if container is running
log_info "1. Container Status"
CONTAINER_STATUS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \
"pvesh get /nodes/\$(hostname)/lxc/${vmid}/status/current --output-format json 2>/dev/null | grep -o '\"status\":\"[^\"]*\"' | head -1 | cut -d'\"' -f4" 2>/dev/null || echo "unknown")
if [ "$CONTAINER_STATUS" = "running" ]; then
log_success "Container is running"
else
log_error "Container status: ${CONTAINER_STATUS}"
echo ""
return 1
fi
echo ""
# 2. Check Besu service status
log_info "2. Besu Service Status"
SERVICE_STATUS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \
"pct exec ${vmid} -- systemctl is-active besu-rpc 2>/dev/null || echo 'inactive'" 2>/dev/null || echo "unknown")
if [ "$SERVICE_STATUS" = "active" ]; then
log_success "Besu service is active"
else
log_warn "Besu service status: ${SERVICE_STATUS}"
fi
echo ""
# 3. Check RPC connectivity
log_info "3. RPC Connectivity"
CHAIN_ID=$(rpc_call "$ip" "eth_chainId" | grep -o '"result":"[^"]*"' | cut -d'"' -f4 || echo "")
if [ -n "$CHAIN_ID" ]; then
log_success "RPC responding - Chain ID: ${CHAIN_ID}"
else
log_error "RPC not responding"
echo ""
return 1
fi
echo ""
# 4. Check block number and sync status
log_info "4. Block Synchronization"
BLOCK_HEX=$(rpc_call "$ip" "eth_blockNumber" | grep -o '"result":"[^"]*"' | cut -d'"' -f4 || echo "")
if [ -n "$BLOCK_HEX" ]; then
# Remove 0x prefix if present, then convert
BLOCK_CLEAN="${BLOCK_HEX#0x}"
BLOCK_DEC=$(printf "%d" "0x${BLOCK_CLEAN}" 2>/dev/null || echo "0")
log_success "Current block: ${BLOCK_DEC} (${BLOCK_HEX})"
else
log_warn "Could not get block number"
BLOCK_DEC="0"
fi
SYNCING=$(rpc_call "$ip" "eth_syncing" | grep -o '"result":[^,}]*' | grep -o 'true\|false' || echo "false")
if [ "$SYNCING" = "false" ]; then
log_success "Node is synchronized"
else
log_warn "Node is still syncing"
fi
echo ""
# 5. Check recent logs for errors
log_info "5. Recent Error Logs (last 50 lines)"
RECENT_ERRORS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \
"pct exec ${vmid} -- journalctl -u besu-rpc --since '10 minutes ago' --no-pager 2>/dev/null | grep -iE 'error|exception|failed|revert|invalid' | tail -20" 2>/dev/null || echo "")
if [ -z "$RECENT_ERRORS" ]; then
log_success "No recent errors found in logs"
else
log_warn "Recent errors found:"
echo "$RECENT_ERRORS" | while IFS= read -r line; do
echo " $line"
done
fi
echo ""
# 6. Check transaction pool status (if available)
log_info "6. Transaction Pool Status"
TXPOOL_STATUS=$(rpc_call "$ip" "txpool_status" | grep -o '"result":{[^}]*}' || echo "")
if [ -n "$TXPOOL_STATUS" ]; then
PENDING=$(echo "$TXPOOL_STATUS" | grep -o '"pending":"[^"]*"' | cut -d'"' -f4 || echo "0")
QUEUED=$(echo "$TXPOOL_STATUS" | grep -o '"queued":"[^"]*"' | cut -d'"' -f4 || echo "0")
log_info "Pending: ${PENDING}, Queued: ${QUEUED}"
else
log_warn "Transaction pool status not available (may be RPC-only node)"
fi
echo ""
# 7. Check recent blocks for transaction failures
log_info "7. Recent Block Transactions"
if [ -n "$BLOCK_HEX" ] && [ "$BLOCK_DEC" -gt 0 ]; then
# Get last 5 blocks
for i in {0..4}; do
CHECK_BLOCK=$((BLOCK_DEC - i))
if [ "$CHECK_BLOCK" -gt 0 ]; then
CHECK_BLOCK_HEX=$(printf "0x%x" "$CHECK_BLOCK" 2>/dev/null || echo "")
if [ -n "$CHECK_BLOCK_HEX" ] && [ "$CHECK_BLOCK_HEX" != "0x0" ]; then
BLOCK_DATA=$(rpc_call "$ip" "eth_getBlockByNumber" "[\"${CHECK_BLOCK_HEX}\", true]")
TX_COUNT=$(echo "$BLOCK_DATA" | grep -o '"transactions":\[[^]]*\]' | grep -o '0x[^"]*' | wc -l || echo "0")
if [ "$TX_COUNT" -gt 0 ]; then
log_info "Block ${CHECK_BLOCK_HEX}: ${TX_COUNT} transaction(s)"
# Check transaction receipts for failures
TX_HASHES=$(echo "$BLOCK_DATA" | grep -o '"hash":"0x[^"]*"' | cut -d'"' -f4 | head -5)
FAILED_COUNT=0
SUCCESS_COUNT=0
for tx_hash in $TX_HASHES; do
TX_RECEIPT=$(rpc_call "$ip" "eth_getTransactionReceipt" "[\"${tx_hash}\"]")
TX_STATUS=$(echo "$TX_RECEIPT" | grep -o '"status":"0x[^"]*"' | cut -d'"' -f4 || echo "")
if [ "$TX_STATUS" = "0x0" ]; then
log_error "Transaction ${tx_hash:0:10}...: FAILED (status 0x0)"
FAILED_COUNT=$((FAILED_COUNT + 1))
elif [ "$TX_STATUS" = "0x1" ]; then
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
fi
done
if [ "$FAILED_COUNT" -gt 0 ]; then
log_warn "Block ${CHECK_BLOCK_HEX}: ${FAILED_COUNT} failed, ${SUCCESS_COUNT} succeeded"
fi
fi
fi
fi
done
fi
echo ""
# 8. Check peer connections
log_info "8. Peer Connections"
PEER_COUNT=$(rpc_call "$ip" "net_peerCount" | grep -o '"result":"[^"]*"' | cut -d'"' -f4 || echo "")
if [ -n "$PEER_COUNT" ]; then
PEER_DEC=$(printf "%d" "$PEER_COUNT" 2>/dev/null || echo "0")
if [ "$PEER_DEC" -gt 0 ]; then
log_success "Connected to ${PEER_DEC} peer(s)"
else
log_warn "No peers connected"
fi
else
log_warn "Could not get peer count"
fi
echo ""
# 9. Check for thread blocking warnings
log_info "9. Thread Blocking Warnings"
THREAD_BLOCKS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \
"pct exec ${vmid} -- journalctl -u besu-rpc --since '1 hour ago' --no-pager 2>/dev/null | grep -i 'thread.*blocked' | tail -5" 2>/dev/null || echo "")
if [ -z "$THREAD_BLOCKS" ]; then
log_success "No thread blocking warnings"
else
log_warn "Thread blocking warnings found:"
echo "$THREAD_BLOCKS" | while IFS= read -r line; do
echo " $line"
done
fi
echo ""
echo "----------------------------------------"
echo ""
}
# Main execution
log_section
log_info "RPC Transaction Failure Investigation"
log_info "Date: $(date)"
log_section
echo ""
# Check all RPC nodes
for vmid in "${!RPC_NODES[@]}"; do
check_node "$vmid" "${RPC_NODES[$vmid]}" || log_warn "Skipping VMID ${vmid} due to errors"
done
log_section
log_info "Investigation Complete"
log_section
echo ""
log_info "Summary:"
log_info "- Checked all running RPC nodes for transaction failures"
log_info "- Reviewed logs, transaction pool, and recent blocks"
log_info "- Checked synchronization and peer connectivity"
echo ""