sentryagent-idp/monitoring/prometheus/alerts.yml

groups:
  - name: agentidp_alerts
    rules:
      - alert: AuthFailureSpike
        expr: rate(agentidp_http_requests_total{status_code="401"}[5m]) > 0.5
        for: 2m
        labels: { severity: warning }
        annotations:
          summary: "Auth failure spike detected"
          description: "More than 0.5 auth failures/sec over the past 2 minutes."

      - alert: RateLimitExhaustion
        expr: rate(agentidp_http_requests_total{status_code="429"}[5m]) > 0.2
        for: 2m
        labels: { severity: warning }
        annotations:
          summary: "Rate limit exhaustion spike"
          description: "Sustained rate limit rejections over the past 2 minutes."

      - alert: AnomalousTokenIssuance
        expr: rate(agentidp_tokens_issued_total[5m]) > 10
        for: 5m
        labels: { severity: warning }
        annotations:
          summary: "Anomalous token issuance rate"
          description: "More than 10 tokens/sec issued over the past 5 minutes."

      - alert: WebhookDeadLetterAccumulating
        expr: increase(agentidp_webhook_dead_letters_total[1h]) > 10
        for: 0m
        labels: { severity: critical }
        annotations:
          summary: "Webhook dead-letter accumulation"
          description: "More than 10 webhook deliveries moved to dead-letter in the past hour."

      - alert: AuditChainIntegrityFailed
        expr: agentidp_audit_chain_integrity == 0
        for: 0m
        labels: { severity: critical }
        annotations:
          summary: "Audit chain integrity failure"
          description: "Audit chain verification failed — possible log tampering detected."

      - alert: CredentialExpiryApproaching
        expr: increase(agentidp_credentials_expiring_soon_total[1h]) > 0
        for: 0m
        labels: { severity: info }
        annotations:
          summary: "Credentials expiring soon"
          description: "One or more agent credentials will expire within 7 days."