Files
explorer-monorepo/scripts/check-ccip-monitor-health.sh

116 lines
3.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# CCIP Monitor Health Check
# Task 87: Create CCIP Monitor Health Check Script
# Usage: ./check-ccip-monitor-health.sh
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_info "========================================="
log_info "CCIP Monitor Health Check"
log_info "========================================="
log_info ""
# Check if running in Proxmox environment
if command -v pct >/dev/null 2>&1; then
log_info "Proxmox environment detected"
# Check container status
log_info ""
log_info "Step 1: Checking container status (VMID 3501)..."
CONTAINER_STATUS=$(pct status 3501 2>/dev/null || echo "not_found")
if echo "$CONTAINER_STATUS" | grep -qi "running"; then
log_success "Container is running"
else
log_error "Container is not running: $CONTAINER_STATUS"
log_info " To start: pct start 3501"
fi
# Check systemd service
log_info ""
log_info "Step 2: Checking systemd service..."
if pct exec 3501 -- systemctl is-active --quiet ccip-monitor 2>/dev/null; then
log_success "CCIP Monitor service is active"
else
log_warn "CCIP Monitor service is not active"
log_info " To start: pct exec 3501 -- systemctl start ccip-monitor"
fi
else
log_warn "Proxmox environment not detected (pct command not found)"
log_info " Skipping container and systemd checks"
fi
# Check metrics endpoint
log_info ""
log_info "Step 3: Checking metrics endpoint..."
METRICS_PORT="${METRICS_PORT:-8000}"
METRICS_URL="http://localhost:$METRICS_PORT/metrics"
if command -v curl >/dev/null 2>&1; then
METRICS_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" "$METRICS_URL" 2>/dev/null || echo "000")
if [ "$METRICS_RESPONSE" = "200" ]; then
log_success "Metrics endpoint is accessible: $METRICS_URL"
# Try to get some metrics
METRICS_CONTENT=$(curl -s "$METRICS_URL" 2>/dev/null || echo "")
if [ -n "$METRICS_CONTENT" ]; then
METRIC_COUNT=$(echo "$METRICS_CONTENT" | grep -c "^[^#]" || echo "0")
log_info " Metrics available: $METRIC_COUNT"
fi
else
log_warn "Metrics endpoint not accessible: HTTP $METRICS_RESPONSE"
log_info " URL: $METRICS_URL"
fi
else
log_warn "curl not available, skipping metrics endpoint check"
fi
# Check configuration file
log_info ""
log_info "Step 4: Checking configuration..."
CONFIG_FILE="/opt/ccip-monitor/.env"
if [ -f "$CONFIG_FILE" ]; then
log_success "Configuration file exists: $CONFIG_FILE"
# Check for required variables
REQUIRED_VARS=("CCIP_ROUTER_ADDRESS" "CCIP_SENDER_ADDRESS" "RPC_URL" "CHAIN_ID")
for VAR in "${REQUIRED_VARS[@]}"; do
if grep -q "^${VAR}=" "$CONFIG_FILE" 2>/dev/null; then
log_success " $VAR: Configured"
else
log_warn " $VAR: Not found in configuration"
fi
done
else
log_warn "Configuration file not found: $CONFIG_FILE"
fi
# Summary
log_info ""
log_info "========================================="
log_info "Health Check Summary"
log_info "========================================="
log_info ""
log_info "For detailed status, check:"
log_info " - Container: pct status 3501"
log_info " - Service: pct exec 3501 -- systemctl status ccip-monitor"
log_info " - Metrics: curl http://localhost:$METRICS_PORT/metrics"
log_info " - Logs: pct exec 3501 -- journalctl -u ccip-monitor -n 50"
log_info ""