#!/usr/bin/env bash # Validate Monitoring Setup # This script validates that monitoring is correctly configured and working set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../lib/init.sh" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" NAMESPACE="${NAMESPACE:-besu-network}" log_success "Validating Monitoring Setup..." # Check Prometheus log_warn "Checking Prometheus..." if kubectl get deployment prometheus -n "$NAMESPACE" &>/dev/null; then log_success "✓ Prometheus deployment exists" # Check if Prometheus is ready READY=$(kubectl get deployment prometheus -n "$NAMESPACE" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") DESIRED=$(kubectl get deployment prometheus -n "$NAMESPACE" -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0") if [ "$READY" == "$DESIRED" ] && [ "$READY" -gt 0 ]; then log_success "✓ Prometheus is ready" # Test Prometheus API PROMETHEUS_POD=$(kubectl get pods -n "$NAMESPACE" -l app=prometheus -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") if [ -n "$PROMETHEUS_POD" ]; then if kubectl exec -n "$NAMESPACE" "$PROMETHEUS_POD" -- wget -qO- http://localhost:9090/api/v1/status/config 2>/dev/null | grep -q "yaml"; then log_success "✓ Prometheus API is responding" else log_warn "⚠ Prometheus API test inconclusive" fi fi else log_warn "⚠ Prometheus is not ready (Ready: $READY, Desired: $DESIRED)" fi else log_warn "⚠ Prometheus deployment not found" fi # Check Grafana log_warn "Checking Grafana..." if kubectl get deployment grafana -n "$NAMESPACE" &>/dev/null; then log_success "✓ Grafana deployment exists" READY=$(kubectl get deployment grafana -n "$NAMESPACE" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") DESIRED=$(kubectl get deployment grafana -n "$NAMESPACE" -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0") if [ "$READY" == "$DESIRED" ] && [ "$READY" -gt 0 ]; then log_success "✓ Grafana is ready" else log_warn "⚠ Grafana is not ready (Ready: $READY, Desired: $DESIRED)" fi else log_warn "⚠ Grafana deployment not found" fi # Check ServiceMonitors log_warn "Checking ServiceMonitors..." SERVICE_MONITORS=("besu-validators" "besu-sentries" "besu-rpc" "oracle-publisher") for sm in "${SERVICE_MONITORS[@]}"; do if kubectl get servicemonitor "$sm" -n "$NAMESPACE" &>/dev/null; then log_success "✓ ServiceMonitor $sm exists" else log_warn "⚠ ServiceMonitor $sm not found, applying..." kubectl apply -f "$PROJECT_ROOT/monitoring/k8s/servicemonitor.yaml" break fi done # Check if metrics are being collected log_warn "Checking if metrics are being collected..." VALIDATOR_PODS=$(kubectl get pods -n "$NAMESPACE" -l component=validator -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") if [ -n "$VALIDATOR_PODS" ]; then # Test metrics endpoint if kubectl exec -n "$NAMESPACE" "$VALIDATOR_PODS" -- wget -qO- http://localhost:9545/metrics 2>/dev/null | grep -q "besu"; then log_success "✓ Metrics endpoint is working" else log_warn "⚠ Metrics endpoint test inconclusive" fi else log_warn "⚠ No pods available for metrics testing" fi # Check Alertmanager log_warn "Checking Alertmanager..." if kubectl get deployment alertmanager -n "$NAMESPACE" &>/dev/null; then log_success "✓ Alertmanager deployment exists" READY=$(kubectl get deployment alertmanager -n "$NAMESPACE" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") DESIRED=$(kubectl get deployment alertmanager -n "$NAMESPACE" -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0") if [ "$READY" == "$DESIRED" ] && [ "$READY" -gt 0 ]; then log_success "✓ Alertmanager is ready" else log_warn "⚠ Alertmanager is not ready (Ready: $READY, Desired: $DESIRED)" fi else log_warn "⚠ Alertmanager deployment not found" fi # Check alert rules log_warn "Checking alert rules..." if [ -f "$PROJECT_ROOT/monitoring/prometheus/alerts/besu.yml" ]; then log_success "✓ Alert rules file exists" # Validate alert rules syntax if command -v promtool &> /dev/null; then if promtool check rules "$PROJECT_ROOT/monitoring/prometheus/alerts/besu.yml" 2>/dev/null; then log_success "✓ Alert rules syntax is valid" else log_warn "⚠ Alert rules syntax validation inconclusive" fi else log_warn "⚠ promtool not available for validation" fi else log_warn "⚠ Alert rules file not found" fi # Check Loki (if deployed) log_warn "Checking Loki..." if kubectl get deployment loki -n "$NAMESPACE" &>/dev/null; then log_success "✓ Loki deployment exists" READY=$(kubectl get deployment loki -n "$NAMESPACE" -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") DESIRED=$(kubectl get deployment loki -n "$NAMESPACE" -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "0") if [ "$READY" == "$DESIRED" ] && [ "$READY" -gt 0 ]; then log_success "✓ Loki is ready" else log_warn "⚠ Loki is not ready (Ready: $READY, Desired: $DESIRED)" fi else log_warn "⚠ Loki deployment not found (optional)" fi log_success "Monitoring validation completed"