groups: - name: besu_alerts interval: 30s rules: # Peer count alerts - alert: BesuLowPeerCount expr: besu_peers_connected_total < 2 for: 5m labels: severity: warning annotations: summary: "Besu node has low peer count" description: "Besu node {{ $labels.instance }} has only {{ $value }} peers connected" # Block production alerts - alert: BesuChainStalled expr: time() - besu_blockchain_head_timestamp > 20 for: 1m labels: severity: critical annotations: summary: "Besu chain is stalled" description: "Besu node {{ $labels.instance }} has not produced a block in {{ $value }} seconds" # Block lag alerts - alert: BesuBlockLag expr: besu_blockchain_head_number - besu_blockchain_fork_choice_head_number > 2 for: 5m labels: severity: warning annotations: summary: "Besu node is lagging behind" description: "Besu node {{ $labels.instance }} is {{ $value }} blocks behind the head" # RPC error rate alerts - alert: BesuHighRpcErrorRate expr: rate(besu_rpc_requests_total{method=~"eth_.*",result="error"}[5m]) > 0.02 for: 5m labels: severity: warning annotations: summary: "Besu RPC has high error rate" description: "Besu node {{ $labels.instance }} has {{ $value }} error rate for RPC requests" # Disk usage alerts - alert: BesuHighDiskUsage expr: (node_filesystem_avail_bytes{mountpoint="/data"} / node_filesystem_size_bytes{mountpoint="/data"}) < 0.2 for: 5m labels: severity: warning annotations: summary: "Besu node has high disk usage" description: "Besu node {{ $labels.instance }} has less than 20% disk space available" # Validator missed proposals - alert: BesuValidatorMissedProposals expr: increase(besu_validator_missed_proposals_total[1h]) > 5 for: 1h labels: severity: warning annotations: summary: "Besu validator missed proposals" description: "Besu validator {{ $labels.instance }} has missed {{ $value }} proposals in the last hour" # Transaction pool size alerts - alert: BesuHighTransactionPoolSize expr: besu_transactions_added_total > 10000 for: 5m labels: severity: warning annotations: summary: "Besu transaction pool is large" description: "Besu node {{ $labels.instance }} has {{ $value }} transactions in the pool" # Memory usage alerts - alert: BesuHighMemoryUsage expr: (container_memory_usage_bytes{pod=~"besu-.*"} / container_spec_memory_limit_bytes{pod=~"besu-.*"}) > 0.9 for: 5m labels: severity: warning annotations: summary: "Besu node has high memory usage" description: "Besu node {{ $labels.instance }} is using {{ $value }} of its memory limit" # CPU usage alerts - alert: BesuHighCpuUsage expr: rate(container_cpu_usage_seconds_total{pod=~"besu-.*"}[5m]) > 0.8 for: 5m labels: severity: warning annotations: summary: "Besu node has high CPU usage" description: "Besu node {{ $labels.instance }} is using {{ $value }} CPU" - name: oracle_alerts interval: 30s rules: # Oracle update alerts - alert: OracleUpdateFailure expr: increase(oracle_update_errors_total[5m]) > 0 for: 5m labels: severity: warning annotations: summary: "Oracle update failed" description: "Oracle publisher has {{ $value }} update errors in the last 5 minutes" # Oracle stale data alerts - alert: OracleStaleData expr: time() - oracle_last_update_time > 300 for: 5m labels: severity: critical annotations: summary: "Oracle data is stale" description: "Oracle has not been updated in {{ $value }} seconds" # Oracle price deviation alerts - alert: OracleHighPriceDeviation expr: oracle_price_deviation > 5 for: 5m labels: severity: warning annotations: summary: "Oracle has high price deviation" description: "Oracle price deviation is {{ $value }}%"