Files
smom-dbis-138/monitoring/prometheus/alerts/region.yml
defiQUG 1fb7266469 Add Oracle Aggregator and CCIP Integration
- Introduced Aggregator.sol for Chainlink-compatible oracle functionality, including round-based updates and access control.
- Added OracleWithCCIP.sol to extend Aggregator with CCIP cross-chain messaging capabilities.
- Created .gitmodules to include OpenZeppelin contracts as a submodule.
- Developed a comprehensive deployment guide in NEXT_STEPS_COMPLETE_GUIDE.md for Phase 2 and smart contract deployment.
- Implemented Vite configuration for the orchestration portal, supporting both Vue and React frameworks.
- Added server-side logic for the Multi-Cloud Orchestration Portal, including API endpoints for environment management and monitoring.
- Created scripts for resource import and usage validation across non-US regions.
- Added tests for CCIP error handling and integration to ensure robust functionality.
- Included various new files and directories for the orchestration portal and deployment scripts.
2025-12-12 14:57:48 -08:00

40 lines
1.4 KiB
YAML

groups:
- name: region_health
rules:
- alert: RegionUnhealthy
expr: up{region=~".+"} == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Region {{ $labels.region }} is unhealthy"
description: "Region {{ $labels.region }} has been unhealthy for more than 5 minutes"
- alert: RegionHighLatency
expr: avg(region_latency_seconds) > 1
for: 10m
labels:
severity: warning
annotations:
summary: "High latency in region {{ $labels.region }}"
description: "Average latency in region {{ $labels.region }} is {{ $value }}s"
- alert: RegionNodeFailure
expr: count(kube_node_status_condition{condition="Ready",status="true"}) < 3
for: 5m
labels:
severity: critical
annotations:
summary: "Multiple node failures in region {{ $labels.region }}"
description: "Less than 3 nodes are ready in region {{ $labels.region }}"
- alert: RegionPodFailure
expr: count(kube_pod_status_phase{phase="Running"}) < count(kube_pod_status_phase{}) * 0.8
for: 5m
labels:
severity: warning
annotations:
summary: "High pod failure rate in region {{ $labels.region }}"
description: "Less than 80% of pods are running in region {{ $labels.region }}"