sentryagent-idp/docs/openapi/health.yaml

openapi: "3.0.3"

info:
  title: SentryAgent.ai — Health Check Service
  version: 1.0.0
  description: |
    Liveness and readiness health endpoints for the SentryAgent.ai AgentIdP platform.

    Both endpoints are **unauthenticated** — safe to call from monitoring systems,
    load balancers, and container orchestrators without credentials.

    **GET /health** performs a fast liveness check (< 50 ms target).
    **GET /health/detailed** probes each dependency with latency measurement.

servers:
  - url: http://localhost:3000
    description: Local development server
  - url: https://api.sentryagent.ai
    description: Production server

tags:
  - name: Health
    description: Liveness and dependency health endpoints

components:
  schemas:
    ServiceSimpleStatus:
      type: string
      enum:
        - connected
        - disconnected
      description: Simple connectivity status for the quick health check.
      example: connected

    ServiceDetailedStatus:
      type: string
      enum:
        - healthy
        - degraded
        - unreachable
      description: |
        Per-service health classification for the detailed health check.
        - `healthy` — responded within 1000 ms
        - `degraded` — responded but latency exceeded 1000 ms
        - `unreachable` — timed out or threw an error

    ServiceHealthResult:
      type: object
      description: Per-service latency and status result from the detailed health probe.
      required:
        - status
        - latencyMs
      properties:
        status:
          $ref: '#/components/schemas/ServiceDetailedStatus'
        latencyMs:
          type: integer
          description: Probe round-trip time in milliseconds.
          example: 12

    HealthResponse:
      type: object
      description: Response body for GET /health — quick liveness check.
      required:
        - status
        - version
        - uptime
        - services
      properties:
        status:
          type: string
          enum:
            - ok
            - degraded
          description: |
            Overall liveness status.
            - `ok` — all services are connected.
            - `degraded` — one or more services are disconnected.
          example: ok
        version:
          type: string
          description: Running npm package version.
          example: "1.0.0"
        uptime:
          type: integer
          description: Process uptime in whole seconds.
          example: 3724
        services:
          type: object
          description: Quick connectivity check for core services.
          required:
            - postgres
            - redis
          properties:
            postgres:
              $ref: '#/components/schemas/ServiceSimpleStatus'
            redis:
              $ref: '#/components/schemas/ServiceSimpleStatus'

    DetailedHealthResponse:
      type: object
      description: |
        Response body for GET /health/detailed. Probes each dependency
        individually and reports per-service latency.
      required:
        - status
        - version
        - uptime
        - services
      properties:
        status:
          $ref: '#/components/schemas/ServiceDetailedStatus'
          description: Worst-case overall status across all probed services.
          example: healthy
        version:
          type: string
          description: Running npm package version.
          example: "1.0.0"
        uptime:
          type: integer
          description: Process uptime in whole seconds.
          example: 3724
        services:
          type: object
          description: |
            Map of service name to per-service health result.
            Always includes `postgres`; `redis`, `vault`, and `opa` are
            included when the respective client / env-var is configured.
          additionalProperties:
            $ref: '#/components/schemas/ServiceHealthResult'
          example:
            postgres:
              status: healthy
              latencyMs: 12
            redis:
              status: healthy
              latencyMs: 3
            vault:
              status: degraded
              latencyMs: 1240
            opa:
              status: healthy
              latencyMs: 8

    ErrorResponse:
      type: object
      description: Standard error response envelope.
      required:
        - code
        - message
      properties:
        code:
          type: string
          example: "INTERNAL_SERVER_ERROR"
        message:
          type: string
          example: "An unexpected error occurred. Please try again later."
        details:
          type: object
          additionalProperties: true

paths:
  /health:
    get:
      operationId: getHealth
      tags:
        - Health
      summary: Quick liveness check
      description: |
        Returns `200 OK` when PostgreSQL and Redis are reachable.
        Returns `503 Service Unavailable` when either dependency is disconnected.

        This endpoint is **unauthenticated** — no Bearer token is required.
        Designed for load-balancer health checks and uptime monitors.
      security: []
      responses:
        '200':
          description: All services are connected and the application is healthy.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HealthResponse'
              example:
                status: ok
                version: "1.0.0"
                uptime: 3724
                services:
                  postgres: connected
                  redis: connected
        '503':
          description: One or more services are disconnected. The application is degraded.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HealthResponse'
              example:
                status: degraded
                version: "1.0.0"
                uptime: 3724
                services:
                  postgres: connected
                  redis: disconnected
        '500':
          description: Unexpected server error during health probe.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                code: INTERNAL_SERVER_ERROR
                message: "An unexpected error occurred. Please try again later."

  /health/detailed:
    get:
      operationId: getHealthDetailed
      tags:
        - Health
      summary: Detailed dependency health with latency
      description: |
        Probes each configured dependency (PostgreSQL, Redis, Vault, OPA) with a
        3000 ms timeout and reports per-service status and latency.

        **HTTP status codes:**
        - `200` — all probed services are `healthy`
        - `207` — at least one service is `degraded` but none are `unreachable`
        - `503` — at least one service is `unreachable`

        This endpoint is **unauthenticated**.
        Vault and OPA entries are omitted when not configured via environment variables.
      security: []
      responses:
        '200':
          description: All probed services are healthy (latency < 1000 ms).
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DetailedHealthResponse'
              example:
                status: healthy
                version: "1.0.0"
                uptime: 3724
                services:
                  postgres:
                    status: healthy
                    latencyMs: 12
                  redis:
                    status: healthy
                    latencyMs: 3
        '207':
          description: At least one service is degraded (latency > 1000 ms) but none are unreachable.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DetailedHealthResponse'
              example:
                status: degraded
                version: "1.0.0"
                uptime: 3724
                services:
                  postgres:
                    status: healthy
                    latencyMs: 14
                  redis:
                    status: degraded
                    latencyMs: 1350
        '503':
          description: At least one service is unreachable (timed out or connection refused).
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DetailedHealthResponse'
              example:
                status: unreachable
                version: "1.0.0"
                uptime: 3724
                services:
                  postgres:
                    status: unreachable
                    latencyMs: 3000
                  redis:
                    status: healthy
                    latencyMs: 4
        '500':
          description: Unexpected server error during health probe.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
              example:
                code: INTERNAL_SERVER_ERROR
                message: "An unexpected error occurred. Please try again later."