#!/usr/bin/env bash # Health check script for VM-based Besu nodes # This script checks the health of all Besu nodes on VMs set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../lib/init.sh" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # Configuration RESOURCE_GROUP="${RESOURCE_GROUP:-defi-oracle-mainnet-rg}" TIMEOUT=5 log_success "Checking VM health..." # Get all VMs VMS=$(az vm list --resource-group "$RESOURCE_GROUP" --query "[].{Name:name, IP:publicIps}" -o json 2>/dev/null || echo "[]") if [ "$VMS" == "[]" ]; then log_error "Error: No VMs found" exit 1 fi # Check each VM echo "$VMS" | jq -r '.[] | "\(.Name)|\(.IP)"' | while IFS='|' read -r VM_NAME VM_IP; do if [ -z "$VM_IP" ] || [ "$VM_IP" == "None" ]; then log_warn "⚠ $VM_NAME: No public IP (may be validator)" continue fi # Check VM accessibility if ping -c 1 -W $TIMEOUT "$VM_IP" &> /dev/null; then log_success "✓ $VM_NAME: VM is accessible" else log_error "✗ $VM_NAME: VM is not accessible" continue fi # Check Docker if ssh -o ConnectTimeout=$TIMEOUT -o StrictHostKeyChecking=no besuadmin@$VM_IP "command -v docker &> /dev/null" 2>/dev/null; then log_success " ✓ Docker is installed" else log_error " ✗ Docker is not installed" continue fi # Check Besu container if ssh -o ConnectTimeout=$TIMEOUT -o StrictHostKeyChecking=no besuadmin@$VM_IP "docker ps | grep -q besu" 2>/dev/null; then log_success " ✓ Besu container is running" # Get container status CONTAINER_STATUS=$(ssh -o ConnectTimeout=$TIMEOUT -o StrictHostKeyChecking=no besuadmin@$VM_IP "docker ps --filter 'name=besu' --format '{{.Status}}'" 2>/dev/null || echo "unknown") log_warn " Status: $CONTAINER_STATUS" else log_error " ✗ Besu container is not running" continue fi # Check metrics endpoint if ssh -o ConnectTimeout=$TIMEOUT -o StrictHostKeyChecking=no besuadmin@$VM_IP "curl -s http://localhost:9545/metrics | grep -q besu" 2>/dev/null; then log_success " ✓ Metrics endpoint is accessible" else log_warn " ⚠ Metrics endpoint is not accessible" fi # Check RPC endpoint (if RPC node) if echo "$VM_NAME" | grep -q "rpc"; then RESPONSE=$(curl -s -X POST -H "Content-Type: application/json" \ --data '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \ http://$VM_IP:8545 2>/dev/null || echo "") if echo "$RESPONSE" | jq -e '.result' > /dev/null 2>&1; then BLOCK_NUMBER=$(echo "$RESPONSE" | jq -r '.result') DECIMAL_BLOCK=$(printf "%d" $BLOCK_NUMBER 2>/dev/null || echo "unknown") log_success " ✓ RPC endpoint responding (block: $DECIMAL_BLOCK)" else log_error " ✗ RPC endpoint not responding" fi fi done log_success "Health check complete!"