# Prometheus configuration for Entra VerifiedID metrics # Add this to your Prometheus configuration scrape_configs: - job_name: 'identity-service-entra' scrape_interval: 15s scrape_timeout: 10s metrics_path: '/metrics' static_configs: - targets: - 'identity-service:4002' labels: service: 'identity-service' component: 'entra-verifiedid' environment: 'production' # Alert rules for Entra VerifiedID groups: - name: entra_verifiedid interval: 30s rules: # High error rate - alert: EntraIssuanceErrorRateHigh expr: | rate(entra_api_errors_total[5m]) / rate(entra_api_requests_total[5m]) > 0.10 for: 5m labels: severity: critical annotations: summary: "Entra VerifiedID error rate is high" description: "Error rate is {{ $value | humanizePercentage }} (threshold: 10%)" # High latency - alert: EntraIssuanceLatencyHigh expr: | histogram_quantile(0.95, entra_api_request_duration_seconds_bucket{operation="issueCredential"}) > 10 for: 5m labels: severity: warning annotations: summary: "Entra VerifiedID latency is high" description: "p95 latency is {{ $value }}s (threshold: 10s)" # Webhook processing failures - alert: EntraWebhookProcessingFailed expr: | rate(entra_webhook_errors_total[5m]) > 0 for: 2m labels: severity: warning annotations: summary: "Entra webhook processing failures detected" description: "{{ $value }} webhook errors in the last 5 minutes" # API down - alert: EntraAPIDown expr: | rate(entra_api_requests_total{status="success"}[5m]) == 0 for: 5m labels: severity: critical annotations: summary: "Entra VerifiedID API appears to be down" description: "No successful API requests in the last 5 minutes" # Rate limit approaching - alert: EntraRateLimitApproaching expr: | rate(entra_api_requests_total[1m]) > 40 for: 2m labels: severity: warning annotations: summary: "Entra API rate limit approaching" description: "Request rate is {{ $value }}/min (limit: 50/min)"