From 8cabc0191cc60a10380b43348a455bfb42c06947 Mon Sep 17 00:00:00 2001 From: "SentryAgent.ai Developer" Date: Tue, 7 Apr 2026 02:24:24 +0000 Subject: [PATCH] docs: commit all Phase 6 documentation updates and OpenSpec archives - devops docs: 8 files updated for Phase 6 state; field-trial.md added (946-line runbook) - developer docs: api-reference (50+ endpoints), quick-start, 5 existing guides updated, 5 new guides added - engineering docs: all 12 files updated (services, architecture, SDK guide, testing, overview) - OpenSpec archives: phase-7-devops-field-trial, developer-docs-phase6-update, engineering-docs-phase6-update - VALIDATOR.md + scripts/start-validator.sh: V&V Architect tooling added - .gitignore: exclude session artifacts, build artifacts, and agent workspaces Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 10 + CLAUDE.md | 2 +- README.md | 1 + VALIDATOR.md | 52 + docs/developers/README.md | 9 +- docs/developers/api-reference.md | 1613 +++++++++++++---- docs/developers/concepts.md | 212 +++ docs/developers/guides/README.md | 7 +- docs/developers/guides/a2a-delegation.md | 167 ++ docs/developers/guides/agntcy-compliance.md | 191 ++ docs/developers/guides/configure-webhooks.md | 219 +++ .../guides/issue-and-revoke-tokens.md | 9 +- docs/developers/guides/manage-api-tiers.md | 140 ++ docs/developers/guides/manage-credentials.md | 5 + docs/developers/guides/query-audit-logs.md | 5 + docs/developers/guides/register-an-agent.md | 9 +- .../guides/use-analytics-dashboard.md | 135 ++ docs/developers/quick-start.md | 61 +- docs/devops/README.md | 17 +- docs/devops/architecture.md | 162 +- docs/devops/database.md | 275 ++- docs/devops/deployment.md | 18 + docs/devops/environment-variables.md | 320 +++- docs/devops/field-trial.md | 946 ++++++++++ docs/devops/local-development.md | 67 +- docs/devops/operations.md | 110 +- docs/devops/security.md | 6 + docs/devops/vault-setup.md | 4 + docs/engineering/01-overview.md | 35 +- docs/engineering/02-architecture.md | 115 +- docs/engineering/03-tech-stack.md | 79 + docs/engineering/04-codebase-structure.md | 17 + docs/engineering/05-services.md | 242 +++ docs/engineering/06-walkthroughs.md | 257 +++ docs/engineering/09-testing.md | 162 ++ docs/engineering/11-sdk-guide.md | 281 ++- docs/engineering/README.md | 13 +- .../.openspec.yaml | 23 + .../developer-docs-phase6-update/proposal.md | 34 + .../specs/ws1-api-reference/spec.md | 1559 ++++++++++++++++ .../specs/ws2-concepts/spec.md | 283 +++ .../specs/ws3-quick-start/spec.md | 212 +++ .../specs/ws4-guides/spec.md | 1079 +++++++++++ .../specs/ws5-readme/spec.md | 77 + .../.openspec.yaml | 23 + .../proposal.md | 37 + .../specs/ws1-services/spec.md | 259 +++ .../specs/ws2-architecture/spec.md | 235 +++ .../specs/ws3-sdk-rust/spec.md | 300 +++ .../specs/ws4-testing/spec.md | 179 ++ .../specs/ws5-remaining/spec.md | 609 +++++++ .../phase-7-devops-field-trial/.openspec.yaml | 14 + .../phase-7-devops-field-trial/proposal.md | 35 + .../specs/devops-update/spec.md | 1217 +++++++++++++ .../specs/field-trial-guide/spec.md | 1026 +++++++++++ scripts/start-validator.sh | 52 + 56 files changed, 12780 insertions(+), 446 deletions(-) create mode 100644 VALIDATOR.md create mode 100644 docs/developers/guides/a2a-delegation.md create mode 100644 docs/developers/guides/agntcy-compliance.md create mode 100644 docs/developers/guides/configure-webhooks.md create mode 100644 docs/developers/guides/manage-api-tiers.md create mode 100644 docs/developers/guides/use-analytics-dashboard.md create mode 100644 docs/devops/field-trial.md create mode 100644 openspec/changes/archive/developer-docs-phase6-update/.openspec.yaml create mode 100644 openspec/changes/archive/developer-docs-phase6-update/proposal.md create mode 100644 openspec/changes/archive/developer-docs-phase6-update/specs/ws1-api-reference/spec.md create mode 100644 openspec/changes/archive/developer-docs-phase6-update/specs/ws2-concepts/spec.md create mode 100644 openspec/changes/archive/developer-docs-phase6-update/specs/ws3-quick-start/spec.md create mode 100644 openspec/changes/archive/developer-docs-phase6-update/specs/ws4-guides/spec.md create mode 100644 openspec/changes/archive/developer-docs-phase6-update/specs/ws5-readme/spec.md create mode 100644 openspec/changes/archive/engineering-docs-phase6-update/.openspec.yaml create mode 100644 openspec/changes/archive/engineering-docs-phase6-update/proposal.md create mode 100644 openspec/changes/archive/engineering-docs-phase6-update/specs/ws1-services/spec.md create mode 100644 openspec/changes/archive/engineering-docs-phase6-update/specs/ws2-architecture/spec.md create mode 100644 openspec/changes/archive/engineering-docs-phase6-update/specs/ws3-sdk-rust/spec.md create mode 100644 openspec/changes/archive/engineering-docs-phase6-update/specs/ws4-testing/spec.md create mode 100644 openspec/changes/archive/engineering-docs-phase6-update/specs/ws5-remaining/spec.md create mode 100644 openspec/changes/archive/phase-7-devops-field-trial/.openspec.yaml create mode 100644 openspec/changes/archive/phase-7-devops-field-trial/proposal.md create mode 100644 openspec/changes/archive/phase-7-devops-field-trial/specs/devops-update/spec.md create mode 100644 openspec/changes/archive/phase-7-devops-field-trial/specs/field-trial-guide/spec.md create mode 100755 scripts/start-validator.sh diff --git a/.gitignore b/.gitignore index 6e7af82..988f23f 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,13 @@ coverage/ # Next.js build output portal/.next/ portal/node_modules/ +portal/tsconfig.tsbuildinfo + +# Agent workspace directories +.cto-workspace/ +.validator-workspace/ + +# Session artifacts +conversation_backup.txt +next_steps.md +vj_notes/ diff --git a/CLAUDE.md b/CLAUDE.md index 5aa7434..aba8768 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,7 +8,7 @@ This is a PRIVATE project session for SentryAgent.ai. ## STARTUP PROTOCOL (Required on every new session) On startup, Claude MUST (in order): -1. Read `/README.md` in full before any action +1. Read `/README.md` in full before any action — this is the project PRD (Product Requirements Document) and single source of truth 2. Register with central hub as `CEO-Session` 3. Check `#vpe-cto-approvals` for any pending CTO messages 4. Identify current phase and sprint status diff --git a/README.md b/README.md index 6773383..7d51fa0 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ **Git Repository**: https://git.sentryagent.ai/ **AI Partner**: Anthropic (Claude — All Development, Implementation & Deployment) **Standards**: AGNTCY (Linux Foundation), OpenAPI 3.0, OAuth 2.0, OIDC +**Document Role**: Product Requirements Document (PRD) — this file is the single source of truth for all product requirements, scope, and standards **Last Updated**: 2026-03-28 **Status**: ? Active — Phase 1 MVP diff --git a/VALIDATOR.md b/VALIDATOR.md new file mode 100644 index 0000000..476e036 --- /dev/null +++ b/VALIDATOR.md @@ -0,0 +1,52 @@ +#!/bin/bash +# ============================================================================= +# SentryAgent.ai — Start V&V Architect (Lead Validator) +# ============================================================================= +# Launches an independent Claude Code instance as the Lead Validator. +# This agent verifies the CTO's work against the PRD/OpenSpec. +# +# Usage: +# ./scripts/start-validator.sh +# ============================================================================= + +set -e + +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VALIDATOR_WORKSPACE="$PROJECT_ROOT/.validator-workspace" +VALIDATOR_PROMPT="$PROJECT_ROOT/VALIDATOR.md" + +echo "==============================================" +echo " SentryAgent.ai — Starting V&V Architect Agent" +echo "==============================================" +echo "" +echo " Project: $PROJECT_ROOT" +echo " Workspace: $VALIDATOR_WORKSPACE" +echo " Role Config: $VALIDATOR_PROMPT" +echo "" +echo " The V&V Architect will:" +echo " 1. Audit Code against OpenSpec PRD" +echo " 2. Enforce DRY Principles" +echo " 3. Log Issues for CTO Resolution" +echo " 4. Maintain Local Fail-Safe Ledger" +echo "" +echo "==============================================" + +# Ensure the Validator Workspace and Local Ledger exist +mkdir -p "$VALIDATOR_WORKSPACE/.openspec/vv_audit" + +# Verify the Validator Persona file exists (from Part 1 of instructions) +if [ ! -f "$VALIDATOR_PROMPT" ]; then + echo "ERROR: VALIDATOR.md not found at $VALIDATOR_PROMPT" + echo "Please ensure you have created the System Instruction file." + exit 1 +fi + +# Synchronize the latest CLAUDE.md to the validator workspace if needed +if [ -f "$PROJECT_ROOT/CLAUDE.md" ]; then + cp "$PROJECT_ROOT/CLAUDE.md" "$VALIDATOR_WORKSPACE/CLAUDE.md" +fi + +# Launch Claude Code as an independent Auditor +cd "$VALIDATOR_WORKSPACE" +exec claude --system-prompt-file "$VALIDATOR_PROMPT" + diff --git a/docs/developers/README.md b/docs/developers/README.md index 6b17308..7701cc1 100644 --- a/docs/developers/README.md +++ b/docs/developers/README.md @@ -1,6 +1,6 @@ # SentryAgent.ai AgentIdP — Developer Documentation -The complete documentation for bedroom developers building with SentryAgent.ai AgentIdP. +The complete documentation for developers building with SentryAgent.ai AgentIdP. ## What is this? @@ -19,10 +19,15 @@ SentryAgent.ai AgentIdP is a free, open-source Identity Provider built specifica | Guide | What it covers | |-------|----------------| -| [Register an Agent](guides/register-an-agent.md) | All fields, validation rules, common errors | +| [Register an Agent](guides/register-an-agent.md) | All registration fields, org scoping, validation rules, common errors | | [Manage Credentials](guides/manage-credentials.md) | Generate, list, rotate, revoke credentials | | [Issue and Revoke Tokens](guides/issue-and-revoke-tokens.md) | OAuth 2.0 client credentials flow, introspect, revoke | | [Query Audit Logs](guides/query-audit-logs.md) | Filters, pagination, event structure, retention | +| [Use the Analytics Dashboard](guides/use-analytics-dashboard.md) | Query token trends, activity heatmap, per-agent usage | +| [Manage API Tiers](guides/manage-api-tiers.md) | Check current tier, understand limits, trigger upgrade | +| [A2A Delegation](guides/a2a-delegation.md) | Create and verify agent-to-agent delegation chains | +| [Configure Webhooks](guides/configure-webhooks.md) | Subscribe to events, delivery guarantees, inspect history | +| [AGNTCY Compliance](guides/agntcy-compliance.md) | Export agent cards, generate compliance reports, verify audit chain | ## Base URL diff --git a/docs/developers/api-reference.md b/docs/developers/api-reference.md index 50e2cbc..f03e98c 100644 --- a/docs/developers/api-reference.md +++ b/docs/developers/api-reference.md @@ -1,34 +1,40 @@ # API Reference -Complete reference for all 14 endpoints across the four SentryAgent.ai AgentIdP services. +Complete reference for all SentryAgent.ai AgentIdP endpoints. ## Base URL -``` -http://localhost:3000/api/v1 -``` + http://localhost:3000/api/v1 -The port is configured via the `PORT` environment variable (default: `3000`). - -All endpoints are currently unversioned within the path prefix `/api/v1`. API versioning will be introduced in Phase 2. +The port is configured via the PORT environment variable (default: 3000). ## Authentication -All endpoints require a JWT Bearer token in the `Authorization` header: +All endpoints require a JWT Bearer token in the Authorization header unless noted otherwise: -``` -Authorization: Bearer -``` + Authorization: Bearer -Obtain a token via `POST /token` using your agent's `client_id` and `client_secret`. +Obtain a token via POST /api/v1/token using your agent's client_id and client_secret. + +Endpoints marked **No auth** do not require a Bearer token. Endpoints marked **Unauthenticated** are +intentionally public. ## Table of Contents - [Errors](#errors) -- [Agent Registry](#agent-registry) — 5 endpoints -- [OAuth 2.0 Tokens](#oauth-20-tokens) — 3 endpoints -- [Credential Management](#credential-management) — 4 endpoints -- [Audit Log](#audit-log) — 2 endpoints +- [Agent Registry](#section-1--agent-registry) +- [Credentials](#section-2--credentials) +- [OAuth 2.0 / Tokens](#section-3--oauth-20--tokens) +- [Audit Log](#section-4--audit-log) +- [Organizations](#section-5--organizations) +- [Analytics](#section-6--analytics) +- [API Tiers](#section-7--api-tiers) +- [Compliance](#section-8--compliance) +- [Webhooks](#section-9--webhooks) +- [Federation](#section-10--federation) +- [DID / OIDC](#section-11--did--oidc) +- [A2A Delegation](#section-12--a2a-delegation) +- [Marketplace](#section-13--marketplace) --- @@ -67,6 +73,18 @@ The `details` field is optional and provides additional context (e.g. which fiel | `AGENT_DECOMMISSIONED` | 403 | Cannot modify a decommissioned agent | | `RETENTION_WINDOW_EXCEEDED` | 400 | Requested audit date is outside the 90-day retention window | | `INTERNAL_SERVER_ERROR` | 500 | Unexpected server error | +| `ORG_NOT_FOUND` | 404 | Organization with the given `orgId` does not exist | +| `ORG_ALREADY_EXISTS` | 409 | An organization with this slug already exists | +| `ORG_SUSPENDED` | 403 | Organization is suspended — operations are blocked | +| `MEMBER_ALREADY_EXISTS` | 409 | Agent is already a member of this organization | +| `DELEGATION_NOT_FOUND` | 404 | Delegation chain with the given `chainId` does not exist | +| `DELEGATION_EXPIRED` | 403 | Delegation token has expired | +| `DELEGATION_REVOKED` | 403 | Delegation token has been revoked | +| `DELEGATION_SCOPE_EXCEEDED` | 403 | Requested scopes exceed the delegator's own scopes | +| `TIER_UPGRADE_NOT_REQUIRED` | 400 | Target tier is not higher than the current tier | +| `WEBHOOK_NOT_FOUND` | 404 | Webhook subscription with the given `id` does not exist | +| `PARTNER_NOT_FOUND` | 404 | Federation partner with the given `id` does not exist | +| `COMPLIANCE_DISABLED` | 404 | AGNTCY compliance endpoints are disabled on this instance | ### Rate limit headers @@ -82,37 +100,52 @@ On `429` responses, wait until `X-RateLimit-Reset` before retrying. --- -## Agent Registry +## Section 1 — Agent Registry ### POST /agents — Register a new agent -Creates a new AI agent identity. The `agentId` is system-assigned. - +**Description**: Creates a new AI agent identity. The `agentId` is system-assigned. **Auth**: Bearer token with `agents:write` scope. **Request body** (`application/json`): | Field | Type | Required | Description | |-------|------|----------|-------------| -| `email` | string | Yes | Unique email-format identifier | +| `email` | string | Yes | Unique email-format identifier for the agent | | `agentType` | enum | Yes | `screener` \| `classifier` \| `orchestrator` \| `extractor` \| `summarizer` \| `router` \| `monitor` \| `custom` | -| `version` | string | Yes | Semantic version (e.g. `1.0.0`) | -| `capabilities` | string[] | Yes | `resource:action` strings, min 1 | -| `owner` | string | Yes | Owning team/org, 1–128 chars | +| `version` | string | Yes | Semantic version string, e.g. `1.0.0` | +| `capabilities` | string[] | Yes | One or more `resource:action` strings (min 1) | +| `owner` | string | Yes | Owning team or organisation, 1–128 characters | | `deploymentEnv` | enum | Yes | `development` \| `staging` \| `production` | +| `organization_id` | string | No | UUID of the org to scope the agent to. Required on multi-tenant instances. | -**Response codes**: +**Response** `201 Created`: -| Code | Meaning | -|------|---------| -| `201` | Agent registered successfully | -| `400` | Validation error | -| `401` | Invalid token | -| `403` | Insufficient scope or free tier limit reached | -| `409` | Email already registered | -| `429` | Rate limit exceeded | +| Field | Type | Description | +|-------|------|-------------| +| `agentId` | UUID | System-assigned immutable identifier | +| `email` | string | Unique email identifier | +| `agentType` | string | Agent type | +| `version` | string | Semantic version | +| `capabilities` | string[] | Capability list | +| `owner` | string | Owning team | +| `deploymentEnv` | string | Deployment environment | +| `status` | string | Always `active` on creation | +| `createdAt` | ISO 8601 | Registration timestamp | +| `updatedAt` | ISO 8601 | Last update timestamp | -**Example**: +**Error responses**: + +| Code | HTTP | Error code | +|------|------|-----------| +| Validation failure | 400 | `VALIDATION_ERROR` | +| Invalid token | 401 | `UNAUTHORIZED` | +| Missing scope | 403 | `INSUFFICIENT_SCOPE` | +| Free tier limit | 403 | `FREE_TIER_LIMIT_EXCEEDED` | +| Email taken | 409 | `AGENT_ALREADY_EXISTS` | +| Rate limit | 429 | `RATE_LIMIT_EXCEEDED` | + +**curl example**: ```bash curl -s -X POST http://localhost:3000/api/v1/agents \ @@ -132,8 +165,7 @@ curl -s -X POST http://localhost:3000/api/v1/agents \ ### GET /agents — List agents -Returns a paginated list of registered agents. - +**Description**: Returns a paginated list of registered agents. **Auth**: Bearer token with `agents:read` scope. **Query parameters**: @@ -144,19 +176,13 @@ Returns a paginated list of registered agents. | `limit` | integer | 20 | Results per page (max 100) | | `owner` | string | — | Filter by owner (exact match) | | `agentType` | enum | — | Filter by agent type | -| `status` | enum | — | Filter by status | +| `status` | enum | — | Filter by status (`active` \| `suspended` \| `decommissioned`) | -**Response codes**: +**Response** `200 OK`: `{ data: Agent[], total: number, page: number, limit: number }` -| Code | Meaning | -|------|---------| -| `200` | List returned | -| `400` | Invalid query parameters | -| `401` | Invalid token | -| `403` | Insufficient scope | -| `429` | Rate limit exceeded | +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` -**Example**: +**curl example**: ```bash curl -s "http://localhost:3000/api/v1/agents?page=1&limit=20&status=active" \ @@ -167,27 +193,16 @@ curl -s "http://localhost:3000/api/v1/agents?page=1&limit=20&status=active" \ ### GET /agents/{agentId} — Get agent by ID -Returns the full identity record for a single agent. - +**Description**: Returns the full identity record for a single agent. **Auth**: Bearer token with `agents:read` scope. -**Path parameters**: +**Path parameters**: `agentId` (UUID) -| Parameter | Type | Description | -|-----------|------|-------------| -| `agentId` | UUID | The agent's immutable identifier | +**Response** `200 OK`: Full agent object (same fields as POST response). -**Response codes**: +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` -| Code | Meaning | -|------|---------| -| `200` | Agent record returned | -| `401` | Invalid token | -| `403` | Insufficient scope | -| `404` | Agent not found | -| `429` | Rate limit exceeded | - -**Example**: +**curl example**: ```bash curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID" \ @@ -198,8 +213,7 @@ curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID" \ ### PATCH /agents/{agentId} — Update agent metadata -Partially updates agent metadata. Only provided fields are changed. Immutable fields (`agentId`, `email`, `createdAt`) cannot be updated. - +**Description**: Partially updates agent metadata. Immutable fields (`agentId`, `email`, `createdAt`) cannot be changed. **Auth**: Bearer token with `agents:write` scope. **Request body** (`application/json`) — all fields optional: @@ -208,25 +222,16 @@ Partially updates agent metadata. Only provided fields are changed. Immutable fi |-------|------|-------------| | `agentType` | enum | Updated agent type | | `version` | string | Updated semantic version | -| `capabilities` | string[] | Updated capabilities (replaces the full list) | +| `capabilities` | string[] | Updated capabilities (replaces full list) | | `owner` | string | Updated owner | | `deploymentEnv` | enum | Updated deployment environment | -| `status` | enum | Updated status (`active` \| `suspended` \| `decommissioned`) | +| `status` | enum | `active` \| `suspended` \| `decommissioned`. Setting `decommissioned` is irreversible. | -> Setting `status` to `decommissioned` is **irreversible**. The agent cannot be reactivated. +**Response** `200 OK`: Full updated agent object. -**Response codes**: +**Error responses**: 400 `VALIDATION_ERROR` / `IMMUTABLE_FIELD`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE` / `AGENT_DECOMMISSIONED`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` -| Code | Meaning | -|------|---------| -| `200` | Agent updated, full record returned | -| `400` | Validation error or attempt to modify immutable field | -| `401` | Invalid token | -| `403` | Insufficient scope or agent is decommissioned | -| `404` | Agent not found | -| `429` | Rate limit exceeded | - -**Example**: +**curl example**: ```bash curl -s -X PATCH "http://localhost:3000/api/v1/agents/$AGENT_ID" \ @@ -239,22 +244,14 @@ curl -s -X PATCH "http://localhost:3000/api/v1/agents/$AGENT_ID" \ ### DELETE /agents/{agentId} — Decommission an agent -Permanently decommissions an agent (soft delete). All active credentials are immediately revoked. This operation is **irreversible**. - +**Description**: Permanently decommissions an agent (soft delete). All active credentials are immediately revoked. Irreversible. **Auth**: Bearer token with `agents:write` scope. -**Response codes**: +**Response** `204 No Content` (empty body). -| Code | Meaning | -|------|---------| -| `204` | Agent decommissioned (no body) | -| `401` | Invalid token | -| `403` | Insufficient scope | -| `404` | Agent not found | -| `409` | Agent already decommissioned | -| `429` | Rate limit exceeded | +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND`, 409 `AGENT_ALREADY_DECOMMISSIONED`, 429 `RATE_LIMIT_EXCEEDED` -**Example**: +**curl example**: ```bash curl -s -X DELETE "http://localhost:3000/api/v1/agents/$AGENT_ID" \ @@ -264,38 +261,143 @@ curl -s -X DELETE "http://localhost:3000/api/v1/agents/$AGENT_ID" \ --- -## OAuth 2.0 Tokens +## Section 2 — Credentials + +### POST /agents/{agentId}/credentials — Generate credentials + +**Description**: Creates a new `client_id` + `client_secret` pair. The `clientSecret` is returned once only. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`) — optional: + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `expiresAt` | ISO 8601 | No | Optional expiry date. Must be a future date. Omit for non-expiring credential. | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `credentialId` | UUID | Unique credential identifier | +| `clientId` | UUID | Same as `agentId` | +| `clientSecret` | string | Plaintext secret (shown once only — store immediately) | +| `status` | string | `active` | +| `createdAt` | ISO 8601 | Creation timestamp | +| `expiresAt` | ISO 8601 \| null | Expiry date or null | +| `revokedAt` | ISO 8601 \| null | Always null on creation | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE` / `AGENT_NOT_ACTIVE`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "expiresAt": "2027-01-01T00:00:00.000Z" }' | jq . +``` + +--- + +### GET /agents/{agentId}/credentials — List credentials + +**Description**: Returns all credentials for an agent (active and revoked). The `clientSecret` is never returned. +**Auth**: Bearer token with `agents:read` scope. + +**Query parameters**: `page` (default 1), `limit` (default 20, max 100), `status` (`active` \| `revoked`) + +**Response** `200 OK`: `{ data: Credential[], total: number, page: number, limit: number }` + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials?status=active" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### POST /agents/{agentId}/credentials/{credentialId}/rotate — Rotate a credential + +**Description**: Generates a new `clientSecret` for the same `credentialId`. The old secret is immediately invalidated. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`) — optional: `{ "expiresAt": "ISO 8601" }` + +**Response** `200 OK`: Full credential object with new `clientSecret` (shown once only). + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND` / `CREDENTIAL_NOT_FOUND`, 409 `CREDENTIAL_ALREADY_REVOKED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST \ + "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials/$CREDENTIAL_ID/rotate" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{}' | jq . +``` + +--- + +### DELETE /agents/{agentId}/credentials/{credentialId} — Revoke a credential + +**Description**: Permanently revokes a credential. Irreversible. +**Auth**: Bearer token with `agents:write` scope. + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND` / `CREDENTIAL_NOT_FOUND`, 409 `CREDENTIAL_ALREADY_REVOKED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE \ + "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials/$CREDENTIAL_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +## Section 3 — OAuth 2.0 / Tokens ### POST /token — Issue an access token -Issues a signed RS256 JWT via the OAuth 2.0 Client Credentials grant. - -**Auth**: Client credentials in the request body (no Bearer token required for this endpoint). - -> **Content-Type**: This endpoint uses `application/x-www-form-urlencoded`, not JSON. +**Description**: Issues a signed RS256 JWT via the OAuth 2.0 Client Credentials grant. +**Auth**: No Bearer token — credentials are in the request body. +**Content-Type**: `application/x-www-form-urlencoded` **Request fields** (form-encoded): | Field | Required | Description | |-------|----------|-------------| | `grant_type` | Yes | Must be `client_credentials` | -| `client_id` | Yes | Your agent's `agentId` (UUID) | -| `client_secret` | Yes | The credential secret | +| `client_id` | Yes | Agent's `agentId` (UUID) | +| `client_secret` | Yes | Credential secret | | `scope` | No | Space-separated scopes. If omitted, all scopes are granted. | -**Response codes**: +**Response** `200 OK`: -| Code | Meaning | -|------|---------| -| `200` | Token issued | -| `400` | Malformed request, invalid scope, or unsupported grant type | -| `401` | Invalid `client_id` or `client_secret` | -| `403` | Agent suspended or monthly token limit reached | -| `429` | Rate limit exceeded | +| Field | Type | Description | +|-------|------|-------------| +| `access_token` | string | Signed RS256 JWT | +| `token_type` | string | Always `Bearer` | +| `expires_in` | integer | Lifetime in seconds (3600) | +| `scope` | string | Granted scopes (space-separated) | -**Note on 429**: The `X-RateLimit-*` headers are returned on all responses, including `429`. +**Error responses**: -**Example**: +| Code | HTTP | Error | +|------|------|-------| +| Bad request / bad grant | 400 | `{ "error": "unsupported_grant_type" }` | +| Bad credentials | 401 | `{ "error": "invalid_client" }` | +| Agent suspended or monthly limit | 403 | `{ "error": "unauthorized_client" }` | +| Rate limit | 429 | `RATE_LIMIT_EXCEEDED` | + +**curl example**: ```bash curl -s -X POST http://localhost:3000/api/v1/token \ @@ -310,30 +412,18 @@ curl -s -X POST http://localhost:3000/api/v1/token \ ### POST /token/introspect — Introspect a token -Checks whether a token is active. Returns `{ "active": false }` for expired or revoked tokens — always `200 OK`. - +**Description**: Checks whether a token is active. Always returns `200 OK` — check the `active` field. **Auth**: Bearer token with `tokens:read` scope. +**Content-Type**: `application/x-www-form-urlencoded` -> **Content-Type**: `application/x-www-form-urlencoded` +**Request fields**: `token` (required), `token_type_hint` (optional, `access_token`) -**Request fields**: +**Response** `200 OK` (active): `{ "active": true, "sub": "...", "client_id": "...", "scope": "...", "token_type": "Bearer", "iat": 0, "exp": 0 }` +**Response** `200 OK` (inactive): `{ "active": false }` -| Field | Required | Description | -|-------|----------|-------------| -| `token` | Yes | The JWT to introspect | -| `token_type_hint` | No | Optional hint — `access_token` | +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` -**Response codes**: - -| Code | Meaning | -|------|---------| -| `200` | Result returned (check `active` field) | -| `400` | Missing `token` parameter | -| `401` | Caller's Bearer token is invalid | -| `403` | Caller's token lacks `tokens:read` scope | -| `429` | Rate limit exceeded | - -**Example**: +**curl example**: ```bash curl -s -X POST http://localhost:3000/api/v1/token/introspect \ @@ -346,30 +436,17 @@ curl -s -X POST http://localhost:3000/api/v1/token/introspect \ ### POST /token/revoke — Revoke a token -Immediately invalidates a token. Idempotent — revoking an already-revoked token returns `200`. +**Description**: Immediately invalidates a token. Idempotent. +**Auth**: Bearer token. +**Content-Type**: `application/x-www-form-urlencoded` -**Auth**: Bearer token (agent can revoke its own tokens). +**Request fields**: `token` (required), `token_type_hint` (optional) -> **Content-Type**: `application/x-www-form-urlencoded` +**Response** `200 OK`: `{}` (empty object) -**Request fields**: +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `FORBIDDEN`, 429 `RATE_LIMIT_EXCEEDED` -| Field | Required | Description | -|-------|----------|-------------| -| `token` | Yes | The JWT to revoke | -| `token_type_hint` | No | Optional hint — `access_token` | - -**Response codes**: - -| Code | Meaning | -|------|---------| -| `200` | Token revoked (or was already inactive) | -| `400` | Missing `token` parameter | -| `401` | Caller's Bearer token is invalid | -| `403` | Insufficient permissions to revoke this token | -| `429` | Rate limit exceeded | - -**Example**: +**curl example**: ```bash curl -s -X POST http://localhost:3000/api/v1/token/revoke \ @@ -380,146 +457,11 @@ curl -s -X POST http://localhost:3000/api/v1/token/revoke \ --- -## Credential Management - -### POST /agents/{agentId}/credentials — Generate credentials - -Creates a new `client_id` + `client_secret` pair. The `clientSecret` is returned **once only**. - -**Auth**: Bearer token with `agents:write` scope. - -**Request body** (`application/json`) — optional: - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `expiresAt` | ISO 8601 | No | Optional expiry date. Must be a future date. If omitted, credential does not expire. | - -**Response codes**: - -| Code | Meaning | -|------|---------| -| `201` | Credential created — save `clientSecret` now | -| `400` | Invalid `expiresAt` | -| `401` | Invalid token | -| `403` | Insufficient scope or agent not active | -| `404` | Agent not found | -| `429` | Rate limit exceeded | - -**Example**: - -```bash -curl -s -X POST "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials" \ - -H "Authorization: Bearer $TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ "expiresAt": "2027-01-01T00:00:00.000Z" }' | jq . -``` - ---- - -### GET /agents/{agentId}/credentials — List credentials - -Returns all credentials (active and revoked). The `clientSecret` is never returned. - -**Auth**: Bearer token with `agents:read` scope. - -**Query parameters**: - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `page` | integer | 1 | Page number | -| `limit` | integer | 20 | Results per page (max 100) | -| `status` | enum | — | Filter by `active` or `revoked` | - -**Response codes**: - -| Code | Meaning | -|------|---------| -| `200` | List returned | -| `400` | Invalid query parameters | -| `401` | Invalid token | -| `403` | Insufficient scope | -| `404` | Agent not found | -| `429` | Rate limit exceeded | - -**Example**: - -```bash -curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials?status=active" \ - -H "Authorization: Bearer $TOKEN" | jq . -``` - ---- - -### POST /agents/{agentId}/credentials/{credentialId}/rotate — Rotate a credential - -Replaces the `clientSecret` for the same `credentialId`. The old secret is immediately invalidated. - -**Auth**: Bearer token with `agents:write` scope. - -**Request body** (`application/json`) — optional: - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `expiresAt` | ISO 8601 | No | New expiry for the rotated credential | - -**Response codes**: - -| Code | Meaning | -|------|---------| -| `200` | Credential rotated — save new `clientSecret` now | -| `400` | Invalid `expiresAt` | -| `401` | Invalid token | -| `403` | Insufficient scope | -| `404` | Agent or credential not found | -| `409` | Credential is already revoked | -| `429` | Rate limit exceeded | - -**Example**: - -```bash -curl -s -X POST \ - "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials/$CREDENTIAL_ID/rotate" \ - -H "Authorization: Bearer $TOKEN" \ - -H "Content-Type: application/json" \ - -d '{}' | jq . -``` - ---- - -### DELETE /agents/{agentId}/credentials/{credentialId} — Revoke a credential - -Permanently revokes a credential. The credential can no longer obtain tokens. Irreversible. - -**Auth**: Bearer token with `agents:write` scope. - -**Response codes**: - -| Code | Meaning | -|------|---------| -| `204` | Credential revoked (no body) | -| `401` | Invalid token | -| `403` | Insufficient scope | -| `404` | Agent or credential not found | -| `409` | Credential already revoked | -| `429` | Rate limit exceeded | - -**Example**: - -```bash -curl -s -X DELETE \ - "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials/$CREDENTIAL_ID" \ - -H "Authorization: Bearer $TOKEN" \ - -o /dev/null -w "%{http_code}\n" -``` - ---- - -## Audit Log +## Section 4 — Audit Log ### GET /audit — Query audit log -Returns a paginated, filtered list of audit events (most recent first). - +**Description**: Returns a paginated, filtered list of audit events (most recent first). **Auth**: Bearer token with `audit:read` scope. **Query parameters**: @@ -529,22 +471,18 @@ Returns a paginated, filtered list of audit events (most recent first). | `page` | integer | 1 | Page number | | `limit` | integer | 50 | Results per page (max 200) | | `agentId` | UUID | — | Filter by agent | -| `action` | enum | — | Filter by action type (see [Audit Log guide](guides/query-audit-logs.md)) | +| `action` | string | — | Filter by action type (e.g. `token.issued`, `agent.created`) | | `outcome` | enum | — | `success` or `failure` | | `fromDate` | ISO 8601 | — | Events at or after this timestamp (max 90 days ago) | | `toDate` | ISO 8601 | — | Events at or before this timestamp | -**Response codes**: +**Response** `200 OK`: `{ data: AuditEvent[], total: number, page: number, limit: number }` -| Code | Meaning | -|------|---------| -| `200` | Events returned | -| `400` | Invalid parameters or date outside retention window | -| `401` | Invalid token | -| `403` | Token lacks `audit:read` scope | -| `429` | Rate limit exceeded | +**AuditEvent fields**: `eventId` (UUID), `agentId` (UUID), `action` (string), `outcome` (string), `ipAddress` (string), `userAgent` (string), `metadata` (object), `timestamp` (ISO 8601) -**Example**: +**Error responses**: 400 `VALIDATION_ERROR` / `RETENTION_WINDOW_EXCEEDED`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: ```bash curl -s "http://localhost:3000/api/v1/audit?agentId=$AGENT_ID&action=token.issued&limit=50" \ @@ -555,29 +493,1086 @@ curl -s "http://localhost:3000/api/v1/audit?agentId=$AGENT_ID&action=token.issue ### GET /audit/{eventId} — Get audit event by ID -Returns a single audit event by its immutable `eventId`. - +**Description**: Returns a single audit event by its immutable `eventId`. **Auth**: Bearer token with `audit:read` scope. -**Path parameters**: +**Path parameters**: `eventId` (UUID) -| Parameter | Type | Description | -|-----------|------|-------------| -| `eventId` | UUID | The audit event's identifier | +**Response** `200 OK`: Single AuditEvent object. -**Response codes**: +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AUDIT_EVENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` -| Code | Meaning | -|------|---------| -| `200` | Audit event returned | -| `401` | Invalid token | -| `403` | Token lacks `audit:read` scope | -| `404` | Event not found or outside 90-day retention window | -| `429` | Rate limit exceeded | - -**Example**: +**curl example**: ```bash curl -s "http://localhost:3000/api/v1/audit/$EVENT_ID" \ -H "Authorization: Bearer $TOKEN" | jq . ``` + +--- + +### GET /audit/verify — Verify audit chain integrity + +**Description**: Verifies the cryptographic hash chain of all audit events. Returns `verified: true` if the chain is intact. Rate limited to 30 req/min (computationally intensive). +**Auth**: Bearer token with `audit:read` scope. + +**Query parameters**: `fromDate` (ISO 8601, optional), `toDate` (ISO 8601, optional) + +**Response** `200 OK`: + +| Field | Type | Description | +|-------|------|-------------| +| `verified` | boolean | `true` if chain is intact, `false` if tampering detected | +| `checkedCount` | integer | Number of events checked | +| `fromDate` | ISO 8601 \| null | Verification window start | +| `toDate` | ISO 8601 \| null | Verification window end | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/audit/verify" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 5 — Organizations + +### POST /organizations — Create an organization + +**Description**: Creates a new tenant organization. Agents can be scoped to an organization via `organization_id`. +**Auth**: Bearer token (OPA scope enforcement — `admin:orgs` or equivalent policy). + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | Yes | Display name, 1–255 characters | +| `slug` | string | Yes | URL-safe identifier — lowercase letters, digits, hyphens only | +| `planTier` | enum | No | `free` (default) \| `pro` \| `enterprise` | +| `maxAgents` | integer | No | Override the plan default agent limit | +| `maxTokensPerMonth` | integer | No | Override the plan default monthly token limit | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `organizationId` | UUID | System-assigned organization identifier | +| `name` | string | Display name | +| `slug` | string | URL-safe slug | +| `planTier` | string | Current plan tier | +| `maxAgents` | integer | Agent limit | +| `maxTokensPerMonth` | integer | Monthly token limit | +| `status` | string | `active` | +| `createdAt` | ISO 8601 | Creation timestamp | +| `updatedAt` | ISO 8601 | Last update timestamp | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 409 `ORG_ALREADY_EXISTS`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Acme AI", + "slug": "acme-ai", + "planTier": "pro" + }' | jq . +``` + +--- + +### GET /organizations — List organizations + +**Description**: Returns a paginated list of organizations. +**Auth**: Bearer token (OPA scope enforcement). + +**Query parameters**: `page` (default 1), `limit` (default 20, max 100), `status` (`active` \| `suspended` \| `deleted`) + +**Response** `200 OK`: `{ data: Organization[], total: number, page: number, limit: number }` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/organizations?status=active" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /organizations/{orgId} — Get organization by ID + +**Description**: Returns the full record for a single organization. +**Auth**: Bearer token (OPA scope enforcement). + +**Path parameters**: `orgId` (UUID) + +**Response** `200 OK`: Full organization object. + +**Error responses**: 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/organizations/$ORG_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### PATCH /organizations/{orgId} — Update organization + +**Description**: Partially updates an organization. The `slug` is immutable after creation. +**Auth**: Bearer token (OPA scope enforcement). + +**Request body** (`application/json`) — all fields optional: + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | New display name | +| `planTier` | enum | `free` \| `pro` \| `enterprise` | +| `maxAgents` | integer | New agent limit | +| `maxTokensPerMonth` | integer | New token limit | +| `status` | enum | `active` \| `suspended` (use DELETE to set `deleted`) | + +**Response** `200 OK`: Full updated organization object. + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/organizations/$ORG_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "planTier": "enterprise" }' | jq . +``` + +--- + +### DELETE /organizations/{orgId} — Delete organization + +**Description**: Soft-deletes an organization. Sets status to `deleted`. +**Auth**: Bearer token (OPA scope enforcement). + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/organizations/$ORG_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +### POST /organizations/{orgId}/members — Add a member + +**Description**: Adds an existing agent to an organization with a specified role. +**Auth**: Bearer token (OPA scope enforcement). + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `agentId` | UUID | Yes | The agent to add | +| `role` | enum | Yes | `member` \| `admin` | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `memberId` | UUID | Membership record identifier | +| `organizationId` | UUID | Organization | +| `agentId` | UUID | Agent | +| `role` | string | `member` or `admin` | +| `joinedAt` | ISO 8601 | Membership creation timestamp | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND` / `AGENT_NOT_FOUND`, 409 `MEMBER_ALREADY_EXISTS`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST "http://localhost:3000/api/v1/organizations/$ORG_ID/members" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "agentId": "'$AGENT_ID'", + "role": "member" + }' | jq . +``` + +--- + +## Section 6 — Analytics + +All analytics endpoints are scoped to the authenticated agent's `organization_id`. + +### GET /analytics/tokens — Token issuance trend + +**Description**: Returns daily token issuance counts for the past N days, scoped to the current organization. +**Auth**: Bearer token. + +**Query parameters**: + +| Parameter | Type | Default | Max | Description | +|-----------|------|---------|-----|-------------| +| `days` | integer | 30 | 90 | Number of days to return | + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "days": 30, + "data": [ + { "date": "2026-03-01", "count": 142 }, + { "date": "2026-03-02", "count": 198 } + ] +} +``` + +**Error responses**: 400 `VALIDATION_ERROR` (days > 90), 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=30" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /analytics/agents/activity — Agent activity heatmap + +**Description**: Returns agent request counts grouped by day-of-week and hour (UTC), for the current organization. +**Auth**: Bearer token. + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "data": [ + { "dow": 1, "hour": 9, "count": 54 }, + { "dow": 1, "hour": 10, "count": 87 } + ] +} +``` + +`dow` is 0 (Sunday) through 6 (Saturday). `hour` is 0–23 UTC. + +**Error responses**: 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents/activity" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /analytics/agents — Per-agent usage summary + +**Description**: Returns token issuance counts per agent for the current calendar month, for the current organization. +**Auth**: Bearer token. + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "month": "2026-03", + "data": [ + { "agentId": "uuid", "tokenCount": 312 }, + { "agentId": "uuid2", "tokenCount": 87 } + ] +} +``` + +**Error responses**: 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 7 — API Tiers + +### GET /tiers/status — Get current tier status + +**Description**: Returns the organization's current plan tier, configured limits, and live usage counters. +**Auth**: Bearer token with a valid `organization_id` claim. + +**Response** `200 OK`: + +| Field | Type | Description | +|-------|------|-------------| +| `tier` | string | `free` \| `pro` \| `enterprise` | +| `limits.maxAgents` | integer | Maximum agents allowed | +| `limits.maxCallsPerDay` | integer | Maximum API calls per day | +| `limits.maxTokensPerDay` | integer | Maximum token issuances per day | +| `usage.agentCount` | integer | Current active agent count | +| `usage.callsToday` | integer | API calls made today | +| `usage.tokensToday` | integer | Tokens issued today | + +**Error responses**: 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/tiers/status" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### POST /tiers/upgrade — Initiate tier upgrade + +**Description**: Creates a Stripe Checkout Session to upgrade the organization to a higher plan tier. Returns a one-time checkout URL to redirect the user to. +**Auth**: Bearer token with a valid `organization_id` claim. + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `target_tier` | enum | Yes | `pro` \| `enterprise` — must be higher than current tier | + +**Response** `200 OK`: + +```json +{ "checkoutUrl": "https://checkout.stripe.com/pay/cs_live_..." } +``` + +**Error responses**: 400 `VALIDATION_ERROR` / `TIER_UPGRADE_NOT_REQUIRED`, 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "pro" }' | jq . +``` + +--- + +## Section 8 — Compliance + +### GET /compliance/controls — SOC 2 control status (public) + +**Description**: Returns the live status of all SOC 2 Trust Services Criteria controls. No authentication required. +**Auth**: None. + +**Response** `200 OK` (`Cache-Control: public, max-age=60`): + +```json +{ + "controls": [ + { "id": "CC6.1", "name": "Logical Access Controls", "status": "pass", "lastChecked": "2026-04-04T00:00:00.000Z" }, + { "id": "CC7.2", "name": "System Monitoring", "status": "pass", "lastChecked": "2026-04-04T00:00:00.000Z" } + ] +} +``` + +Each control: `id` (string), `name` (string), `status` (`pass` \| `fail` \| `unknown`), `lastChecked` (ISO 8601) + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/controls" | jq . +``` + +--- + +### GET /compliance/report — AGNTCY compliance report + +**Description**: Generates an AGNTCY compliance report for the authenticated tenant. Cached in Redis for 5 minutes. Sets `X-Cache: HIT` when served from cache. +**Auth**: Bearer token. + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "generatedAt": "2026-04-04T00:00:00.000Z", + "agntcyConformance": true, + "agentCount": 12, + "verifiedAgentCount": 12, + "auditChainIntegrity": true, + "from_cache": false +} +``` + +**Error responses**: 401 `UNAUTHORIZED`, 404 `COMPLIANCE_DISABLED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/report" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /compliance/agent-cards — Export AGNTCY agent cards + +**Description**: Exports all active agents for the authenticated tenant as AGNTCY-standard agent card JSON objects. +**Auth**: Bearer token. + +**Response** `200 OK`: Array of agent card objects. + +Each card: `did` (string), `name` (string), `agentType` (string), `capabilities` (string[]), `owner` (string), `version` (string), `deploymentEnv` (string), `identityProvider` (string), `issuedAt` (ISO 8601) + +**Error responses**: 401 `UNAUTHORIZED`, 404 `COMPLIANCE_DISABLED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/agent-cards" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 9 — Webhooks + +All webhook endpoints require Bearer token authentication and are scoped to the authenticated agent's `organization_id`. Required scopes are enforced via OPA policy. + +### POST /webhooks — Create a subscription + +**Description**: Creates a new webhook subscription for the organization. The `signingSecret` is returned once only. +**Auth**: Bearer token. OPA enforces `webhooks:write`. + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | Yes | Human-readable subscription name | +| `url` | string | Yes | HTTPS endpoint that will receive events | +| `events` | string[] | Yes | One or more event types to subscribe to (see event type list below) | + +**Available event types**: `agent.created`, `agent.updated`, `agent.suspended`, `agent.reactivated`, `agent.decommissioned`, `credential.generated`, `credential.rotated`, `credential.revoked`, `token.issued`, `token.revoked` + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `id` | UUID | Subscription identifier | +| `organization_id` | UUID | Owning organization | +| `name` | string | Subscription name | +| `url` | string | Target endpoint URL | +| `events` | string[] | Subscribed event types | +| `active` | boolean | `true` | +| `signingSecret` | string | HMAC-SHA256 signing secret (shown once) | +| `failure_count` | integer | `0` | +| `created_at` | ISO 8601 | Creation timestamp | +| `updated_at` | ISO 8601 | Last update timestamp | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/webhooks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "prod-events", + "url": "https://my-app.example.com/hooks/sentryagent", + "events": ["agent.created", "token.issued"] + }' | jq . +``` + +--- + +### GET /webhooks — List subscriptions + +**Description**: Returns all webhook subscriptions for the organization. Signing secrets are never returned. +**Auth**: Bearer token. OPA enforces `webhooks:read`. + +**Response** `200 OK`: Array of subscription objects (without `signingSecret`). + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/webhooks" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /webhooks/{id} — Get subscription by ID + +**Description**: Returns a single subscription by its UUID. +**Auth**: Bearer token. OPA enforces `webhooks:read`. + +**Path parameters**: `id` (UUID) + +**Response** `200 OK`: Single subscription object (without `signingSecret`). + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### PATCH /webhooks/{id} — Update subscription + +**Description**: Partially updates a webhook subscription. +**Auth**: Bearer token. OPA enforces `webhooks:write`. + +**Request body** (`application/json`) — all fields optional: `name` (string), `url` (string), `events` (string[]), `active` (boolean) + +**Response** `200 OK`: Updated subscription object. + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "active": false }' | jq . +``` + +--- + +### DELETE /webhooks/{id} — Delete subscription + +**Description**: Permanently deletes a webhook subscription and all its delivery records. +**Auth**: Bearer token. OPA enforces `webhooks:write`. + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +### GET /webhooks/{id}/deliveries — List delivery history + +**Description**: Returns a paginated list of delivery attempts for a subscription. +**Auth**: Bearer token. OPA enforces `webhooks:read`. + +**Query parameters**: `limit` (default 20), `offset` (default 0) + +**Response** `200 OK`: + +```json +{ + "deliveries": [...], + "total": 47, + "limit": 20, + "offset": 0 +} +``` + +Each delivery: `id` (UUID), `subscription_id` (UUID), `event_type` (string), `payload` (object), `status` (`pending` \| `delivered` \| `failed` \| `dead_letter`), `http_status_code` (integer \| null), `attempt_count` (integer), `next_retry_at` (ISO 8601 \| null), `delivered_at` (ISO 8601 \| null), `created_at` (ISO 8601), `updated_at` (ISO 8601) + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID/deliveries?limit=20&offset=0" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 10 — Federation + +All partner management endpoints require the `admin:orgs` scope (enforced via OPA). The verify +endpoint requires any authenticated agent. + +### POST /federation/trust — Register a trusted partner + +**Description**: Registers a new trusted federation partner (a remote IdP whose tokens this instance will accept). +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Request body** (`application/json`): Implementation-defined fields for partner registration including `name` (string), `issuer` (string — partner's token issuer URL), `jwksUri` (string — partner's JWKS endpoint). + +**Response** `201 Created`: Partner record with `id`, `name`, `issuer`, `jwksUri`, `createdAt`. + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/federation/trust \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "PartnerCorp IdP", + "issuer": "https://idp.partnercorp.com", + "jwksUri": "https://idp.partnercorp.com/.well-known/jwks.json" + }' | jq . +``` + +--- + +### GET /federation/partners — List partners + +**Description**: Returns all registered federation partners. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Response** `200 OK`: Array of partner records. + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/federation/partners" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /federation/partners/{id} — Get partner by ID + +**Description**: Returns a single federation partner record. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Path parameters**: `id` (UUID) + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `PARTNER_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/federation/partners/$PARTNER_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### PATCH /federation/partners/{id} — Update partner + +**Description**: Partially updates a federation partner record. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**curl example**: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/federation/partners/$PARTNER_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "name": "Updated Partner Name" }' | jq . +``` + +--- + +### DELETE /federation/partners/{id} — Delete partner + +**Description**: Removes a federation partner. This instance will no longer accept tokens from the partner's issuer. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Response** `204 No Content`. + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/federation/partners/$PARTNER_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +### POST /federation/verify — Verify a federated token + +**Description**: Verifies a token issued by a trusted federation partner. Returns the decoded claims if the token is valid and the issuer is trusted. +**Auth**: Bearer token (any authenticated agent — no `admin:orgs` required). + +**Request body** (`application/json`): `{ "token": "" }` + +**Response** `200 OK`: `{ "valid": true, "claims": { ... } }` or `{ "valid": false, "reason": "..." }` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/federation/verify \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "token": "'$PARTNER_TOKEN'" }' | jq . +``` + +--- + +## Section 11 — DID / OIDC + +### GET /agents/{agentId}/did — Get agent DID document + +**Description**: Returns the W3C DID Core 1.0 document for an agent. Unauthenticated — publicly accessible. +**Auth**: None. + +**Response** `200 OK`: W3C DID Document. + +```json +{ + "@context": ["https://www.w3.org/ns/did/v1"], + "id": "did:web:localhost%3A3000:agents:a1b2c3d4", + "controller": "did:web:localhost%3A3000:agents:a1b2c3d4", + "verificationMethod": [{ + "id": "did:web:localhost%3A3000:agents:a1b2c3d4#key-1", + "type": "JsonWebKey2020", + "controller": "did:web:localhost%3A3000:agents:a1b2c3d4", + "publicKeyJwk": { "kty": "RSA", "n": "...", "e": "AQAB" } + }], + "authentication": ["did:web:localhost%3A3000:agents:a1b2c3d4#key-1"], + "agntcy": { + "agentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "agentType": "screener", + "capabilities": ["resume:read"], + "deploymentEnv": "production", + "owner": "talent-team", + "version": "1.0.0" + } +} +``` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/did" | jq . +``` + +--- + +### GET /agents/{agentId}/did/resolve — Resolve agent DID + +**Description**: Returns the full W3C DID Resolution Result format including metadata. +**Auth**: Bearer token + OPA policy. + +**Response** `200 OK`: + +```json +{ + "didDocument": { ... }, + "didDocumentMetadata": { + "created": "2026-03-28T09:00:00.000Z", + "updated": "2026-03-28T09:00:00.000Z", + "deactivated": false + }, + "didResolutionMetadata": { + "contentType": "application/did+ld+json", + "retrieved": "2026-04-04T00:00:00.000Z" + } +} +``` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/did/resolve" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /agents/{agentId}/did/card — Get AGNTCY agent card + +**Description**: Returns the AGNTCY-format agent card for an agent. Unauthenticated. +**Auth**: None. + +**Response** `200 OK`: + +```json +{ + "did": "did:web:localhost%3A3000:agents:a1b2c3d4", + "name": "screener-001@talent.ai", + "agentType": "screener", + "capabilities": ["resume:read"], + "owner": "talent-team", + "version": "1.0.0", + "deploymentEnv": "production", + "identityProvider": "https://sentryagent.ai", + "issuedAt": "2026-04-04T00:00:00.000Z" +} +``` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/did/card" | jq . +``` + +--- + +### GET /.well-known/openid-configuration — OIDC discovery document + +**Description**: Returns the OIDC Provider discovery document. Unauthenticated. Mounted at the server root (not under `/api/v1`). +**Auth**: None. + +**curl example**: + +```bash +curl -s "http://localhost:3000/.well-known/openid-configuration" | jq . +``` + +--- + +### GET /.well-known/jwks.json — JWKS endpoint + +**Description**: Returns the JSON Web Key Set (public keys used to verify ID tokens). Unauthenticated. +**Auth**: None. + +**curl example**: + +```bash +curl -s "http://localhost:3000/.well-known/jwks.json" | jq . +``` + +--- + +### GET /agent-info — Agent identity claims + +**Description**: Returns identity claims for the authenticated agent (equivalent to UserInfo in OIDC). Mounted at the server root. +**Auth**: Bearer token. + +**curl example**: + +```bash +curl -s "http://localhost:3000/agent-info" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### POST /api/v1/oidc/token — OIDC token exchange (GitHub Actions) + +**Description**: Exchanges a GitHub OIDC JWT for a SentryAgent.ai access token. Unauthenticated — the GitHub OIDC token is the credential. Trust-policy enforcement happens inside the controller. +**Auth**: None (GitHub OIDC JWT in body). + +**Request body** (`application/json`): `{ "github_token": "", "agentId": "" }` + +**Response** `200 OK`: `{ "access_token": "...", "token_type": "Bearer", "expires_in": 3600 }` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oidc/token \ + -H "Content-Type: application/json" \ + -d '{ + "github_token": "'$GITHUB_OIDC_TOKEN'", + "agentId": "'$AGENT_ID'" + }' | jq . +``` + +--- + +### POST /api/v1/oidc/trust-policies — Create trust policy + +**Description**: Registers a trust policy that allows GitHub Actions workflows matching specific claims to exchange tokens. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`): Repository, branch, and claim constraints (implementation-defined fields). + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oidc/trust-policies \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "agentId": "'$AGENT_ID'", + "repository": "my-org/my-repo", + "branch": "main" + }' | jq . +``` + +--- + +### GET /api/v1/oidc/trust-policies — List trust policies + +**Description**: Returns all trust policies for an agent. +**Auth**: Bearer token with `agents:write` scope. + +**Query parameters**: `agentId` (UUID, required) + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/oidc/trust-policies?agentId=$AGENT_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### DELETE /api/v1/oidc/trust-policies/{id} — Delete trust policy + +**Description**: Deletes a trust policy by its UUID. +**Auth**: Bearer token with `agents:write` scope. + +**Response** `204 No Content`. + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/oidc/trust-policies/$POLICY_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +## Section 12 — A2A Delegation + +### POST /oauth2/token/delegate — Create a delegation chain + +**Description**: Creates a delegation chain that grants a delegatee agent a subset of the delegator's scopes for a limited time. +**Auth**: Bearer token. + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `delegateeAgentId` | UUID | Yes | The agent that receives delegated authority | +| `scopes` | string[] | Yes | Scopes to delegate — must be a strict subset of the caller's own scopes | +| `ttlSeconds` | integer | Yes | Delegation lifetime in seconds. Min: 60, Max: 86400 | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `delegationToken` | string | Signed delegation token (HMAC-SHA256) | +| `chainId` | UUID | Delegation chain identifier | +| `delegatorAgentId` | UUID | Agent granting the delegation | +| `delegateeAgentId` | UUID | Agent receiving the delegation | +| `scopes` | string[] | Delegated scopes | +| `expiresAt` | ISO 8601 | Expiry timestamp | + +**Error responses**: 400 `VALIDATION_ERROR` / `DELEGATION_SCOPE_EXCEEDED`, 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/delegate \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "delegateeAgentId": "'$DELEGATEE_AGENT_ID'", + "scopes": ["agents:read"], + "ttlSeconds": 3600 + }' | jq . +``` + +--- + +### POST /oauth2/token/verify-delegation — Verify a delegation token + +**Description**: Verifies a delegation token and returns the chain details. Returns `valid: false` (not an error) for expired or revoked tokens. +**Auth**: Bearer token. + +**Request body** (`application/json`): `{ "delegationToken": "" }` + +**Response** `200 OK`: + +| Field | Type | Description | +|-------|------|-------------| +| `valid` | boolean | Whether the delegation is currently valid | +| `chainId` | UUID | Chain identifier | +| `delegatorAgentId` | UUID | Delegating agent | +| `delegateeAgentId` | UUID | Receiving agent | +| `scopes` | string[] | Delegated scopes | +| `issuedAt` | ISO 8601 | Issue timestamp | +| `expiresAt` | ISO 8601 | Expiry timestamp | +| `revokedAt` | ISO 8601 \| null | Revocation timestamp, or null | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/verify-delegation \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "delegationToken": "'$DELEGATION_TOKEN'" }' | jq . +``` + +--- + +### DELETE /oauth2/token/delegate/{chainId} — Revoke a delegation chain + +**Description**: Immediately revokes a delegation chain. The delegatee can no longer use the delegation token. Only the delegator can revoke their own chains. +**Auth**: Bearer token. + +**Path parameters**: `chainId` (UUID) + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 403 `FORBIDDEN`, 404 `DELEGATION_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/oauth2/token/delegate/$CHAIN_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +## Section 13 — Marketplace + +The Marketplace is feature-flagged via `MARKETPLACE_ENABLED` env var. When disabled, all endpoints +return `404 NOT_FOUND`. All marketplace endpoints are **unauthenticated** — no Bearer token required. + +### GET /marketplace/agents — List public agents + +**Description**: Returns a paginated list of publicly-listed agents. +**Auth**: None. + +**Query parameters**: `page` (default 1), `limit` (default 20, max 100), `q` (text search), `capability` (filter by capability string), `publisher` (filter by owner) + +**Response** `200 OK`: `{ data: PublicAgent[], total: number, page: number, limit: number }` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/marketplace/agents?q=screener&limit=20" | jq . +``` + +--- + +### GET /marketplace/agents/{agentId} — Get public agent + +**Description**: Returns a single public agent with its DID document included. Returns 404 if the agent is private or inactive. +**Auth**: None. + +**Path parameters**: `agentId` (UUID) + +**Response** `200 OK`: Public agent object including `didDocument` field. + +**Error responses**: 404 `AGENT_NOT_FOUND` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/marketplace/agents/$AGENT_ID" | jq . +``` diff --git a/docs/developers/concepts.md b/docs/developers/concepts.md index 5fe5690..455af4a 100644 --- a/docs/developers/concepts.md +++ b/docs/developers/concepts.md @@ -126,3 +126,215 @@ AgentIdP is free. These are the limits on the free tier: | Audit log retention | 90 days | Events older than 90 days are automatically purged; queries return empty results | The monthly token counter resets on the first day of each calendar month. The rate limit window resets every 60 seconds; the reset timestamp is in the `X-RateLimit-Reset` response header. + +--- + +## Organizations and Multi-tenancy + +An **organization** is the top-level grouping unit in AgentIdP. Every registered agent can be +scoped to an organization by including an `organization_id` in the agent registration request. +Organizations have a unique `slug` (URL-safe identifier), a display `name`, and a `planTier` +that controls per-org resource limits. All API operations that involve analytics, webhooks, tiers, +and delegation are tenant-scoped: they only see data belonging to their organization. + +**Tenant isolation** is enforced at the service layer. Every query involving multi-tenant data +filters by `organization_id`. A token issued to an agent in org A cannot read data from org B. +The `organization_id` is embedded in the JWT at token issuance time and validated on every +request. This means you do not need to pass an org ID as a query parameter — it is derived +automatically from the authenticated token. + +When you create an organization, you define its `slug`. Slugs are immutable — once set, they +cannot be changed. Choose a slug that matches your domain or product namespace, as it is used +in DID identifiers for agents in that organization. Membership is managed through the +`POST /api/v1/organizations/{orgId}/members` endpoint, which lets you add an existing agent +to an organization with a `member` or `admin` role. + +| Field | Type | Description | +|-------|------|-------------| +| `organizationId` | UUID | System-assigned immutable identifier | +| `name` | string | Human-readable display name | +| `slug` | string | URL-safe unique identifier (immutable after creation) | +| `planTier` | enum | `free` \| `pro` \| `enterprise` | +| `maxAgents` | integer | Maximum active agents in this org | +| `maxTokensPerMonth` | integer | Maximum token issuances per month | +| `status` | enum | `active` \| `suspended` \| `deleted` | + +--- + +## DID Identity + +Every agent registered in AgentIdP automatically receives a **Decentralized Identifier (DID)** +using the `did:web` method. A DID is a globally unique, self-describing identifier that does not +rely on a central registry. The DID for an agent takes the form +`did:web::agents:` — for example, +`did:web:localhost%3A3000:agents:a1b2c3d4-e5f6-7890-abcd-ef1234567890`. The `did:web` method +means the DID document is resolvable via HTTPS: a resolver fetches +`https:///api/v1/agents//did`. + +The **DID Document** is a JSON-LD object that describes the agent's cryptographic keys and +service endpoints. It contains: the agent's DID as its `id`, a `verificationMethod` array with +the agent's public key in JWK format, an `authentication` array referencing that key, and an +`agntcy` extension object carrying agent metadata (type, capabilities, version, owner, +deploymentEnv). This document is publicly accessible — no authentication required — so any +external system can verify this agent's identity without contacting AgentIdP directly. + +The `did:web` scheme was chosen because it is widely supported by DID resolvers, requires no +blockchain, and leverages standard HTTPS infrastructure. When an external system receives a +token from your agent, it can resolve your agent's DID, retrieve the public key from the DID +Document, and independently verify the token's signature. This is the foundation of +cross-system agent identity verification. + +``` +DID Document structure for a registered agent +─────────────────────────────────────────────── +{ + "@context": ["https://www.w3.org/ns/did/v1"], + "id": "did:web::agents:", + "controller": "did:web::agents:", + "verificationMethod": [ + { + "id": "#key-1", + "type": "JsonWebKey2020", + "controller": "", + "publicKeyJwk": { "kty": "RSA", ... } + } + ], + "authentication": ["#key-1"], + "agntcy": { + "agentId": "", + "agentType": "screener", + "capabilities": ["resume:read"], + "deploymentEnv": "production", + "owner": "talent-team", + "version": "1.0.0" + } +} +``` + +--- + +## OIDC Provider + +AgentIdP implements a subset of the **OpenID Connect (OIDC)** protocol, acting as an OIDC +Provider for the agents it manages. This means AgentIdP publishes a standard discovery +document at `GET /.well-known/openid-configuration`, which any OIDC-aware client can use to +discover supported grant types, token endpoint, JWKS URI, and other metadata. It also exposes +a JWKS endpoint at `GET /.well-known/jwks.json` for external systems to retrieve the public +keys used to verify tokens. + +The **`/agent-info` endpoint** is the equivalent of OIDC's UserInfo endpoint — it returns +identity claims for the authenticated agent. External systems that receive a token issued by +AgentIdP can call this endpoint (with that token) to retrieve the agent's verified identity +attributes: its `agentId`, `email`, `agentType`, `capabilities`, and `organization_id`. This +is particularly useful when a downstream service needs to verify the identity of an agent +presenting a token, without duplicating identity data in its own store. + +AgentIdP also supports **OIDC token exchange for GitHub Actions**. If you run your agent +deployment workflows in GitHub Actions, you can configure a trust policy +(`POST /api/v1/oidc/trust-policies`) that maps a GitHub repository and branch to an AgentIdP +agent. The workflow can then exchange its GitHub OIDC JWT for an AgentIdP access token via +`POST /api/v1/oidc/token` — no stored secrets required. This enables keyless, short-lived +token issuance in CI/CD pipelines. + +--- + +## A2A Delegation + +**Agent-to-Agent (A2A) delegation** allows one agent to grant another agent a subset of its own +OAuth 2.0 scopes for a limited time. This is the building block for multi-agent pipelines where +an orchestrator agent needs to delegate work to a specialist sub-agent without sharing its own +full credentials. A delegation chain consists of: a delegator (the agent granting authority), +a delegatee (the agent receiving authority), a set of scopes (must be a strict subset of the +delegator's own scopes), and a TTL (60 seconds to 86,400 seconds). + +The **grant flow** is straightforward: the delegator calls `POST /api/v1/oauth2/token/delegate` +with the delegatee's agent ID, the scopes to grant, and the TTL. AgentIdP returns a signed +delegation token. The delegatee presents this token when calling +`POST /api/v1/oauth2/token/verify-delegation` to prove it has been granted authority. AgentIdP +verifies the chain integrity and returns the delegation details including whether it is still +valid. The delegator can revoke the chain at any time via +`DELETE /api/v1/oauth2/token/delegate/{chainId}`. + +Delegation is useful for: workflow handoffs between specialist agents, granting a monitoring +agent read-only access to resources owned by a processing agent, and time-limited cross-agent +authorization without credential sharing. Because delegation tokens are signed and verified +server-side, a delegatee cannot extend the TTL, expand the scope, or pass the delegation to a +third agent. The chain is always exactly two hops: delegator → delegatee. + +``` +A2A Delegation Flow +─────────────────── +1. Orchestrator (delegator) calls POST /api/v1/oauth2/token/delegate + → body: { delegateeAgentId, scopes: ["agents:read"], ttlSeconds: 3600 } + ← response: { delegationToken: "...", chainId: "...", expiresAt: "..." } + +2. Orchestrator passes delegationToken to the sub-agent out-of-band + +3. Sub-agent (delegatee) calls POST /api/v1/oauth2/token/verify-delegation + → body: { delegationToken: "..." } + ← response: { valid: true, scopes: ["agents:read"], expiresAt: "..." } + +4. Sub-agent uses its own Bearer token + confirmed scope to act on behalf + +5. (Optional) Orchestrator calls DELETE /api/v1/oauth2/token/delegate/{chainId} + to revoke early +``` + +--- + +## API Tier Plans + +AgentIdP has three subscription tiers: **Free**, **Pro**, and **Enterprise**. Every organization +is on one tier at a time. The tier determines the resource limits enforced at runtime: maximum +number of active agents, maximum API calls per day, and maximum token issuances per day. When a +limit is reached, the relevant operation returns a `403 FREE_TIER_LIMIT_EXCEEDED` error until the +next calendar day resets the counter (for daily limits) or until you upgrade your tier. + +You can check your current tier, configured limits, and live usage at any time by calling +`GET /api/v1/tiers/status`. The response shows your tier name, all three limit values, and the +live usage counters for the current day. If you need higher limits, call +`POST /api/v1/tiers/upgrade` with `{ "target_tier": "pro" }` or `"enterprise"`. This creates a +Stripe Checkout Session and returns a one-time `checkoutUrl`. After payment, the organization's +tier is updated automatically via Stripe webhook. + +Enterprise tier limits are effectively unlimited (enforced as `Infinity` in the tier +configuration). Enterprise customers should contact SentryAgent.ai to arrange billing and +configure custom limits if needed. The `maxAgents` and `maxTokensPerMonth` fields on an +organization record can be overridden at org creation or update to set tighter or looser limits +than the tier defaults, regardless of tier. + +| Limit | Free | Pro | Enterprise | +|-------|------|-----|------------| +| Max agents | 10 | 100 | Unlimited | +| Max API calls / day | 1,000 | 50,000 | Unlimited | +| Max token issuances / day | 1,000 | 50,000 | Unlimited | +| Audit log retention | 90 days | 90 days | 90 days | +| Webhooks | Yes | Yes | Yes | +| Analytics | Yes | Yes | Yes | +| A2A Delegation | Yes | Yes | Yes | + +--- + +## AGNTCY Compliance + +**AGNTCY** is an open standard from the Linux Foundation that defines how AI agents should be +identified, described, and governed across platforms. AgentIdP implements AGNTCY compliance +in two ways: every agent automatically gets a DID and an agent card (a structured JSON object +that describes the agent in the AGNTCY format), and AgentIdP can generate a **compliance +report** that summarizes the verified state of all agents in a tenant. An agent card is the +AGNTCY equivalent of a business card — it carries the agent's DID, type, capabilities, owner, +version, and identity provider. + +The **compliance report** (available at `GET /api/v1/compliance/report`) covers two dimensions: +agent-identity verification (are all active agents reachable via their DID?) and audit-trail +integrity (is the hash chain of audit events intact?). The report includes a boolean +`agntcyConformance` field that summarizes whether the tenant meets AGNTCY baseline requirements. +Reports are cached in Redis for 5 minutes; the `X-Cache: HIT` header signals a cached response. + +For self-auditing and external audits, you can export all active agents as AGNTCY agent cards +in bulk via `GET /api/v1/compliance/agent-cards`. This is an array of card objects that +external compliance tools and AGNTCY-compatible registries can ingest directly. The +`GET /api/v1/compliance/controls` endpoint (no authentication required) provides a live +status snapshot of all SOC 2 Trust Services Criteria controls that AgentIdP monitors internally. +These endpoints are gated by the `COMPLIANCE_ENABLED` environment variable; if disabled, they +return `404`. diff --git a/docs/developers/guides/README.md b/docs/developers/guides/README.md index ed44dcc..a920fd9 100644 --- a/docs/developers/guides/README.md +++ b/docs/developers/guides/README.md @@ -4,9 +4,14 @@ Step-by-step walkthroughs for each AgentIdP workflow. | Guide | What it covers | |-------|----------------| -| [Register an Agent](register-an-agent.md) | All registration fields, validation rules, common errors and fixes | +| [Register an Agent](register-an-agent.md) | All registration fields, organization scoping, validation rules, common errors | | [Manage Credentials](manage-credentials.md) | Generate, list, rotate, and revoke credentials | | [Issue and Revoke Tokens](issue-and-revoke-tokens.md) | OAuth 2.0 Client Credentials flow, JWT structure, introspect, revoke | | [Query Audit Logs](query-audit-logs.md) | Filters, pagination, event structure, 90-day retention | +| [Use the Analytics Dashboard](use-analytics-dashboard.md) | Query token trends, agent activity heatmap, and per-agent usage | +| [Manage API Tiers](manage-api-tiers.md) | Check current tier, understand limits, trigger a Stripe upgrade | +| [A2A Delegation](a2a-delegation.md) | Create and verify agent-to-agent delegation chains | +| [Configure Webhooks](configure-webhooks.md) | Subscribe to events, understand delivery guarantees, inspect history | +| [AGNTCY Compliance](agntcy-compliance.md) | Export agent cards, generate compliance reports, verify audit chain | All guides assume you have a running local server and a valid Bearer token. See the [Quick Start](../quick-start.md) if you haven't done that yet. diff --git a/docs/developers/guides/a2a-delegation.md b/docs/developers/guides/a2a-delegation.md new file mode 100644 index 0000000..92d8c76 --- /dev/null +++ b/docs/developers/guides/a2a-delegation.md @@ -0,0 +1,167 @@ +# A2A Delegation + +Agent-to-Agent (A2A) delegation lets one agent grant another agent a subset of its OAuth 2.0 +scopes for a defined period. This is the foundation for building secure multi-agent pipelines +where an orchestrator agent coordinates specialist sub-agents. + +--- + +## Prerequisites + +- A running AgentIdP instance +- Two registered agents: the delegator (has a Bearer token) and the delegatee (knows its + `agentId`) +- The delegator's scopes must be a superset of the scopes it wants to delegate + +--- + +## How delegation works + +``` +Delegator agent Delegatee agent + | | + |-- POST /oauth2/token/delegate ----------->| (creates chain server-side) + |<-- { delegationToken, chainId, scopes } --| + | | + |-- passes delegationToken out-of-band ---->| + | | + | POST /oauth2/token/verify-delegation + | <-- { valid: true, scopes, expiresAt } + | | + | (optional) DELETE /oauth2/token/delegate/{chainId} +``` + +--- + +## Step 1 — Create a delegation chain + +The delegator agent creates the chain by specifying the delegatee's `agentId`, the scopes to +delegate (must be a strict subset of the delegator's own scopes), and the TTL in seconds. + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/delegate \ + -H "Authorization: Bearer $DELEGATOR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "delegateeAgentId": "'$DELEGATEE_AGENT_ID'", + "scopes": ["agents:read"], + "ttlSeconds": 3600 + }' | jq . +``` + +Response (`201 Created`): + +```json +{ + "delegationToken": "sa_del_a1b2c3d4e5f6...", + "chainId": "d4e5f6a7-b8c9-0123-def0-123456789abc", + "delegatorAgentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "delegateeAgentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", + "scopes": ["agents:read"], + "expiresAt": "2026-04-04T10:00:00.000Z" +} +``` + +Save the `delegationToken` and `chainId`: + +```bash +export DELEGATION_TOKEN="sa_del_a1b2c3d4e5f6..." +export CHAIN_ID="d4e5f6a7-b8c9-0123-def0-123456789abc" +``` + +**TTL constraints**: minimum 60 seconds, maximum 86400 seconds (24 hours). Choose the minimum +TTL that covers the delegatee's task. + +--- + +## Step 2 — Pass the delegation token to the delegatee + +Pass `DELEGATION_TOKEN` to the delegatee agent out-of-band. This can be via a shared queue, +a direct API call to the sub-agent, or any other channel. The token is a signed opaque string — +do not parse it; treat it as an opaque credential. + +--- + +## Step 3 — Verify the delegation token + +The delegatee (or any agent checking the delegation) calls the verify endpoint. This confirms +the chain is valid and not expired or revoked. + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/verify-delegation \ + -H "Authorization: Bearer $DELEGATEE_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "delegationToken": "'$DELEGATION_TOKEN'" }' | jq . +``` + +Response (`200 OK` — valid delegation): + +```json +{ + "valid": true, + "chainId": "d4e5f6a7-b8c9-0123-def0-123456789abc", + "delegatorAgentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "delegateeAgentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", + "scopes": ["agents:read"], + "issuedAt": "2026-04-04T09:00:00.000Z", + "expiresAt": "2026-04-04T10:00:00.000Z", + "revokedAt": null +} +``` + +Response (`200 OK` — expired delegation): + +```json +{ + "valid": false, + "chainId": "d4e5f6a7-b8c9-0123-def0-123456789abc", + "delegatorAgentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "delegateeAgentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", + "scopes": ["agents:read"], + "issuedAt": "2026-04-03T09:00:00.000Z", + "expiresAt": "2026-04-03T10:00:00.000Z", + "revokedAt": null +} +``` + +> The verify endpoint always returns `200 OK`. Check the `valid` field — it is never an error +> response for an expired or revoked token. + +--- + +## Step 4 — (Optional) Revoke the delegation early + +If the delegatee has completed its task and you want to revoke the delegation before it expires, +the delegator calls: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/oauth2/token/delegate/$CHAIN_ID" \ + -H "Authorization: Bearer $DELEGATOR_TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +Expected response: `204` (no body). + +After revocation, verify requests for this chain return `{ "valid": false, "revokedAt": "" }`. + +--- + +## Scope rules + +- Delegated scopes must be a strict subset of the delegator's own token scopes +- You cannot delegate scopes you do not have +- You cannot delegate to yourself (delegateeAgentId must differ from delegatorAgentId) +- Delegation is not transitive — a delegatee cannot re-delegate to a third agent + +--- + +## Common errors + +### `400 VALIDATION_ERROR` — scope not a subset + +The delegator attempted to delegate a scope it does not hold. Check `GET /api/v1/token/introspect` +to confirm which scopes your token carries. + +### `400 VALIDATION_ERROR` — ttlSeconds out of range + +Min: 60, Max: 86400. Values outside this range return a validation error. diff --git a/docs/developers/guides/agntcy-compliance.md b/docs/developers/guides/agntcy-compliance.md new file mode 100644 index 0000000..2b80e1e --- /dev/null +++ b/docs/developers/guides/agntcy-compliance.md @@ -0,0 +1,191 @@ +# AGNTCY Compliance + +This guide explains how to use AgentIdP's AGNTCY compliance features: exporting agent cards, +generating compliance reports, verifying audit chain integrity, and checking SOC 2 control status. + +--- + +## Prerequisites + +- A running AgentIdP instance +- `COMPLIANCE_ENABLED` environment variable not set to `false` (enabled by default) +- A valid Bearer token (for authenticated endpoints) +- At least one registered agent + +--- + +## What is AGNTCY? + +AGNTCY is an open standard from the Linux Foundation for AI agent identity and governance. +AgentIdP implements AGNTCY by giving every agent a DID and an agent card. The compliance +endpoints let you export and report on that data in structured, auditable formats. + +--- + +## Export agent cards + +`GET /api/v1/compliance/agent-cards` + +Exports all active agents in your organization as AGNTCY-standard agent card JSON objects. +Suitable for ingestion by external compliance tools or AGNTCY-compatible registries. + +```bash +curl -s "http://localhost:3000/api/v1/compliance/agent-cards" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response (`200 OK`): Array of agent card objects. + +```json +[ + { + "did": "did:web:localhost%3A3000:agents:a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "name": "screener-001@talent.ai", + "agentType": "screener", + "capabilities": ["resume:read", "email:send"], + "owner": "talent-team", + "version": "1.0.0", + "deploymentEnv": "production", + "identityProvider": "https://sentryagent.ai", + "issuedAt": "2026-04-04T09:00:00.000Z" + } +] +``` + +**Use cases**: +- Share with external auditors to demonstrate your agent fleet +- Import into AGNTCY-compatible discovery registries +- Baseline snapshot before and after deployments + +Save the output to a file: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/agent-cards" \ + -H "Authorization: Bearer $TOKEN" > agent-cards-$(date +%Y%m%d).json +``` + +--- + +## Generate a compliance report + +`GET /api/v1/compliance/report` + +Generates an AGNTCY compliance report for your tenant. The report is cached for 5 minutes +(check the `X-Cache` header to see if the response is fresh or cached). + +```bash +curl -s "http://localhost:3000/api/v1/compliance/report" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response (`200 OK`): + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "generatedAt": "2026-04-04T09:00:00.000Z", + "agntcyConformance": true, + "agentCount": 12, + "verifiedAgentCount": 12, + "auditChainIntegrity": true, + "from_cache": false +} +``` + +**Interpreting the fields**: + +| Field | Description | +|-------|-------------| +| `agntcyConformance` | `true` if all agents have valid DIDs and the audit chain is intact | +| `agentCount` | Total active agents in the organization | +| `verifiedAgentCount` | Agents with a resolvable DID document | +| `auditChainIntegrity` | `true` if the audit event hash chain has not been tampered with | +| `from_cache` | `true` if served from Redis cache (up to 5 minutes old) | + +**Force a fresh report**: Wait 5 minutes for the cache to expire. The `from_cache: false` +response is always freshly generated. + +--- + +## Verify audit chain integrity + +`GET /api/v1/audit/verify` + +Verifies that the cryptographic hash chain of audit events is intact. Returns `verified: true` +if no tampering is detected. Rate limited to 30 requests/minute (computationally intensive). + +Requires: Bearer token with `audit:read` scope. + +```bash +curl -s "http://localhost:3000/api/v1/audit/verify" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response (`200 OK`): + +```json +{ + "verified": true, + "checkedCount": 1247, + "fromDate": null, + "toDate": null +} +``` + +Verify a specific date window: + +```bash +curl -s "http://localhost:3000/api/v1/audit/verify?fromDate=2026-03-01T00:00:00.000Z&toDate=2026-03-31T23:59:59.999Z" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +**Interpreting the result**: +- `verified: true` — no tampering detected in the checked window +- `verified: false` — the hash chain has a broken link; contact SentryAgent.ai support +- `checkedCount` — number of audit events verified + +--- + +## Check SOC 2 control status (public) + +`GET /api/v1/compliance/controls` + +Returns the live status of all SOC 2 Trust Services Criteria controls. No authentication +required. Responses are cached by CDN/proxies for 60 seconds (`Cache-Control: public, max-age=60`). + +```bash +curl -s "http://localhost:3000/api/v1/compliance/controls" | jq . +``` + +Response (`200 OK`): + +```json +{ + "controls": [ + { + "id": "CC6.1", + "name": "Logical Access Controls", + "status": "pass", + "lastChecked": "2026-04-04T08:00:00.000Z" + }, + { + "id": "CC7.2", + "name": "System Monitoring", + "status": "pass", + "lastChecked": "2026-04-04T08:00:00.000Z" + } + ] +} +``` + +Each control has a `status` of `pass`, `fail`, or `unknown`. Status is updated by background +jobs that run periodically. This endpoint is suitable for embedding in external status pages +or compliance dashboards without sharing API credentials. + +--- + +## When compliance endpoints are disabled + +If `COMPLIANCE_ENABLED=false` is set in the server environment, the AGNTCY compliance endpoints +(`/compliance/report` and `/compliance/agent-cards`) return `404 COMPLIANCE_DISABLED`. The SOC 2 +endpoints (`/compliance/controls` and `/audit/verify`) are never gated and always active. diff --git a/docs/developers/guides/configure-webhooks.md b/docs/developers/guides/configure-webhooks.md new file mode 100644 index 0000000..923ec3c --- /dev/null +++ b/docs/developers/guides/configure-webhooks.md @@ -0,0 +1,219 @@ +# Configure Webhooks + +Webhooks let AgentIdP push real-time events to your application when agents, credentials, or +tokens change state. This guide covers creating subscriptions, the available event types, +delivery guarantees, and how to inspect delivery history. + +--- + +## Prerequisites + +- A running AgentIdP instance +- A valid Bearer token with `organization_id` in its claims +- A publicly reachable HTTPS endpoint to receive events (for local development, use a tool + like [ngrok](https://ngrok.com)) + +--- + +## Available event types + +| Event type | Triggered when | +|-----------|----------------| +| `agent.created` | A new agent is registered | +| `agent.updated` | An agent's metadata is updated | +| `agent.suspended` | An agent's status changes to `suspended` | +| `agent.reactivated` | An agent's status changes from `suspended` to `active` | +| `agent.decommissioned` | An agent is decommissioned | +| `credential.generated` | New credentials are created for an agent | +| `credential.rotated` | A credential's secret is rotated | +| `credential.revoked` | A credential is revoked | +| `token.issued` | An access token is issued | +| `token.revoked` | An access token is revoked | + +--- + +## Create a subscription + +`POST /api/v1/webhooks` + +```bash +curl -s -X POST http://localhost:3000/api/v1/webhooks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "prod-agent-events", + "url": "https://my-app.example.com/hooks/sentryagent", + "events": ["agent.created", "agent.decommissioned", "token.issued"] + }' | jq . +``` + +Response (`201 Created`): + +```json +{ + "id": "wh-1a2b3c4d-e5f6-7890-abcd-ef1234567890", + "organization_id": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "name": "prod-agent-events", + "url": "https://my-app.example.com/hooks/sentryagent", + "events": ["agent.created", "agent.decommissioned", "token.issued"], + "active": true, + "signingSecret": "whsec_a1b2c3d4e5f6789...", + "failure_count": 0, + "created_at": "2026-04-04T09:00:00.000Z", + "updated_at": "2026-04-04T09:00:00.000Z" +} +``` + +> **Save the `signingSecret` now.** It is shown once. Use it to verify the HMAC-SHA256 +> signature on incoming webhook requests. See "Verifying delivery signatures" below. + +```bash +export WEBHOOK_ID="wh-1a2b3c4d-e5f6-7890-abcd-ef1234567890" +export SIGNING_SECRET="whsec_a1b2c3d4e5f6789..." +``` + +--- + +## Webhook payload format + +Every delivery sends a POST to your URL with `Content-Type: application/json` and this body: + +```json +{ + "id": "evt-uuid-here", + "event": "agent.created", + "timestamp": "2026-04-04T09:00:00.000Z", + "organization_id": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "data": { + "agentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "email": "screener-001@talent.ai", + "agentType": "screener" + } +} +``` + +The `data` object contains event-specific fields. For `agent.*` events it includes agent +metadata. For `credential.*` events it includes `credentialId` and `agentId`. For `token.*` +events it includes `agentId` and `scope`. + +--- + +## Verifying delivery signatures + +AgentIdP signs every delivery with HMAC-SHA256 using your `signingSecret`. The signature is +in the `X-SentryAgent-Signature` header as `sha256=`. + +Verify it in Node.js: + +```javascript +const crypto = require('crypto'); + +function verifySignature(rawBody, signingSecret, signatureHeader) { + const expected = 'sha256=' + crypto + .createHmac('sha256', signingSecret) + .update(rawBody) + .digest('hex'); + return crypto.timingSafeEqual( + Buffer.from(expected), + Buffer.from(signatureHeader) + ); +} +``` + +Always verify the signature before processing the event. Reject requests with invalid signatures +with `401 Unauthorized`. + +--- + +## Delivery guarantees and retry policy + +- AgentIdP delivers each event **at least once** — your endpoint may receive duplicates +- Use the `id` field to deduplicate events +- Delivery is attempted immediately; on failure, retries use exponential backoff +- After repeated failures, the delivery moves to `dead_letter` status +- Subscriptions with high `failure_count` may be automatically disabled + +Delivery statuses: `pending` → `delivered` (success) or `failed` (attempt failed) → `dead_letter` +(all retries exhausted) + +--- + +## List subscriptions + +```bash +curl -s "http://localhost:3000/api/v1/webhooks" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Pause or resume a subscription + +To pause (disable) a subscription without deleting it: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "active": false }' | jq . +``` + +To resume: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "active": true }' | jq . +``` + +--- + +## Inspect delivery history + +`GET /api/v1/webhooks/{id}/deliveries` + +```bash +curl -s "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID/deliveries?limit=20&offset=0" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "deliveries": [ + { + "id": "del-uuid", + "subscription_id": "wh-uuid", + "event_type": "agent.created", + "payload": { ... }, + "status": "delivered", + "http_status_code": 200, + "attempt_count": 1, + "next_retry_at": null, + "delivered_at": "2026-04-04T09:00:01.000Z", + "created_at": "2026-04-04T09:00:00.000Z", + "updated_at": "2026-04-04T09:00:01.000Z" + } + ], + "total": 47, + "limit": 20, + "offset": 0 +} +``` + +Use `offset` to paginate through delivery history. Increase `limit` to retrieve more records +per page (the server default is 20). + +--- + +## Delete a subscription + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +Expected response: `204`. This permanently deletes the subscription and all its delivery records. diff --git a/docs/developers/guides/issue-and-revoke-tokens.md b/docs/developers/guides/issue-and-revoke-tokens.md index 8fbb535..af8803a 100644 --- a/docs/developers/guides/issue-and-revoke-tokens.md +++ b/docs/developers/guides/issue-and-revoke-tokens.md @@ -47,10 +47,13 @@ The token expires in `3600` seconds (1 hour). Request a new one before it expire | Scope | What it allows | |-------|----------------| -| `agents:read` | Read agent records | -| `agents:write` | Create, update, decommission agents | +| `agents:read` | Read agent identity records | +| `agents:write` | Create, update, and decommission agents | | `tokens:read` | Introspect tokens | -| `audit:read` | Query audit logs | +| `audit:read` | Query audit logs and verify audit chain integrity | +| `webhooks:read` | List webhook subscriptions and delivery history | +| `webhooks:write` | Create, update, and delete webhook subscriptions | +| `admin:orgs` | Manage organizations and federation partners | Request only the scopes your agent needs. diff --git a/docs/developers/guides/manage-api-tiers.md b/docs/developers/guides/manage-api-tiers.md new file mode 100644 index 0000000..09057fc --- /dev/null +++ b/docs/developers/guides/manage-api-tiers.md @@ -0,0 +1,140 @@ +# Manage API Tiers + +This guide explains how to check your organization's current plan tier, understand the enforced +limits, and initiate an upgrade via Stripe. + +--- + +## Prerequisites + +- A running AgentIdP instance +- A valid Bearer token with `organization_id` in its claims + +--- + +## Check current tier status + +`GET /api/v1/tiers/status` + +Returns your organization's tier, the configured limits, and live usage counters for today. + +```bash +curl -s "http://localhost:3000/api/v1/tiers/status" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tier": "free", + "limits": { + "maxAgents": 10, + "maxCallsPerDay": 1000, + "maxTokensPerDay": 1000 + }, + "usage": { + "agentCount": 3, + "callsToday": 142, + "tokensToday": 87 + } +} +``` + +**Understanding the fields**: + +| Field | Description | +|-------|-------------| +| `tier` | Current plan: `free`, `pro`, or `enterprise` | +| `limits.maxAgents` | Maximum active (non-decommissioned) agents allowed | +| `limits.maxCallsPerDay` | Maximum total API calls per calendar day (UTC) | +| `limits.maxTokensPerDay` | Maximum token issuances per calendar day (UTC) | +| `usage.agentCount` | Current number of active agents | +| `usage.callsToday` | API calls made so far today | +| `usage.tokensToday` | Tokens issued so far today | + +**When limits are reached**: The relevant endpoint returns `403 FREE_TIER_LIMIT_EXCEEDED`. +Daily counters reset at midnight UTC. The agent count limit is a current count, not a daily +counter — decommissioning an agent immediately frees capacity. + +--- + +## Tier comparison + +| Limit | Free | Pro | Enterprise | +|-------|------|-----|------------| +| Max agents | 10 | 100 | Unlimited | +| Max API calls / day | 1,000 | 50,000 | Unlimited | +| Max token issuances / day | 1,000 | 50,000 | Unlimited | + +--- + +## Upgrade your tier + +`POST /api/v1/tiers/upgrade` + +Creates a Stripe Checkout Session and returns a one-time URL. Complete the payment in the +browser to upgrade your organization's tier. + +```bash +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "pro" }' | jq . +``` + +Response: + +```json +{ + "checkoutUrl": "https://checkout.stripe.com/pay/cs_live_a1b2c3d4e5f6..." +} +``` + +Open `checkoutUrl` in a browser to complete payment. After successful payment, Stripe sends a +webhook to AgentIdP which automatically upgrades your organization's tier. + +**Constraints**: +- `target_tier` must be `pro` or `enterprise` +- `target_tier` must be higher than your current tier (you cannot downgrade via this endpoint) +- Attempting to upgrade to the current or a lower tier returns `400 VALIDATION_ERROR` + +```bash +# Upgrade from free to pro +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "pro" }' | jq . + +# Upgrade from pro to enterprise +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "enterprise" }' | jq . +``` + +--- + +## Common errors + +### `400 VALIDATION_ERROR` — target_tier missing or invalid + +```json +{ + "code": "VALIDATION_ERROR", + "message": "target_tier must be one of: free, pro, enterprise.", + "details": { "received": "premium" } +} +``` + +**Fix**: Use `"pro"` or `"enterprise"`. + +### `400 TIER_UPGRADE_NOT_REQUIRED` — not an upgrade + +**Fix**: You are already on this tier or a higher tier. Check `GET /api/v1/tiers/status` first. + +### `401 UNAUTHORIZED` — token lacks organization_id + +The tier endpoints require a token with an `organization_id` claim. Use a token issued by an +agent that was registered with `organization_id`. Tokens issued via the bootstrap method +(without an org) do not carry `organization_id` and will fail. diff --git a/docs/developers/guides/manage-credentials.md b/docs/developers/guides/manage-credentials.md index d4a0f45..10d34e3 100644 --- a/docs/developers/guides/manage-credentials.md +++ b/docs/developers/guides/manage-credentials.md @@ -2,6 +2,11 @@ A credential is a `client_id` + `client_secret` pair that your agent uses to get access tokens. This guide covers all four credential operations. +> **Multi-tenant note**: Credentials issued for an agent that belongs to an organization will +> produce tokens carrying an `organization_id` claim. This claim is required by analytics, +> webhooks, tier enforcement, and A2A delegation. Ensure your agent is registered with +> `organization_id` before issuing credentials for production use. + All credential endpoints are under `/api/v1/agents/{agentId}/credentials` and require a Bearer token with `agents:write` scope. --- diff --git a/docs/developers/guides/query-audit-logs.md b/docs/developers/guides/query-audit-logs.md index e865452..7b4986f 100644 --- a/docs/developers/guides/query-audit-logs.md +++ b/docs/developers/guides/query-audit-logs.md @@ -25,6 +25,11 @@ Every action below is automatically recorded. You cannot create, modify, or dele | `credential.revoked` | Successful `DELETE /agents/{agentId}/credentials/{credentialId}` | | `auth.failed` | Failed authentication attempt on `POST /token` | +> **Audit chain verification**: In addition to querying events, you can verify the cryptographic +> integrity of the entire audit hash chain via `GET /api/v1/audit/verify`. This endpoint requires +> `audit:read` scope and is rate-limited to 30 requests/min. See the +> [API Reference](../api-reference.md#get-auditverify---verify-audit-chain-integrity) for details. + --- ## Query the audit log diff --git a/docs/developers/guides/register-an-agent.md b/docs/developers/guides/register-an-agent.md index 20b7eb1..cdc5265 100644 --- a/docs/developers/guides/register-an-agent.md +++ b/docs/developers/guides/register-an-agent.md @@ -20,6 +20,7 @@ Requires: `Authorization: Bearer ` with `agents:write` scope. | `capabilities` | string[] | Yes | One or more capability strings in `resource:action` format. Minimum 1. | | `owner` | string | Yes | Team or organisation that owns this agent. 1–128 characters. | | `deploymentEnv` | string (enum) | Yes | Target deployment environment. See values below. | +| `organization_id` | string (UUID) | No | UUID of the organization to scope this agent to. Recommended on all multi-tenant instances. | ### `agentType` values @@ -70,7 +71,8 @@ curl -s -X POST http://localhost:3000/api/v1/agents \ "version": "1.0.0", "capabilities": ["resume:read", "email:send", "candidate:score"], "owner": "talent-acquisition-team", - "deploymentEnv": "production" + "deploymentEnv": "production", + "organization_id": "'$ORG_ID'" }' | jq . ``` @@ -93,6 +95,11 @@ Successful response (`201 Created`): The `agentId` is assigned by the system — it is immutable and never changes. +> **Organization scoping**: If you include `organization_id` in the request, the agent is +> associated with that organization. Analytics, webhook events, and tier enforcement are all +> scoped by organization. To create an organization first, see the +> [Quick Start](../quick-start.md) guide. + --- ## Immutable fields diff --git a/docs/developers/guides/use-analytics-dashboard.md b/docs/developers/guides/use-analytics-dashboard.md new file mode 100644 index 0000000..8cbc9a0 --- /dev/null +++ b/docs/developers/guides/use-analytics-dashboard.md @@ -0,0 +1,135 @@ +# Use the Analytics Dashboard + +This guide explains how to query the three analytics endpoints to understand your organization's +token usage and agent activity patterns. + +All analytics endpoints require Bearer token authentication and are scoped to the organization +embedded in your token. + +--- + +## Prerequisites + +- A running AgentIdP instance +- A valid Bearer token with `organization_id` in its claims +- At least one agent registered and some token issuance activity + +--- + +## Token issuance trend + +`GET /api/v1/analytics/tokens` + +Returns daily token issuance counts for the past N days (default 30, max 90). Use this to +track usage growth, identify traffic spikes, and plan capacity. + +```bash +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=30" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "days": 30, + "data": [ + { "date": "2026-03-06", "count": 142 }, + { "date": "2026-03-07", "count": 198 }, + { "date": "2026-03-08", "count": 0 } + ] +} +``` + +**Interpreting the data**: Each item in `data` is one calendar day (UTC) with the number of +tokens issued on that day. Days with zero issuance are included with `count: 0`. The array +is ordered chronologically, oldest first. + +**Using it**: Compare day-over-day counts to identify growth or anomalies. A sudden spike in +`count` may indicate an agent retry loop or a credential leak. Zero-count days during expected +operation may indicate a deployment issue. + +**Query parameter**: `days` — positive integer, max 90. Returns `400 VALIDATION_ERROR` if +exceeded. + +```bash +# Last 7 days +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=7" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Last 90 days (maximum) +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=90" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Agent activity heatmap + +`GET /api/v1/analytics/agents/activity` + +Returns request counts grouped by day-of-week (0 = Sunday, 6 = Saturday) and hour (0–23, UTC). +Use this to identify peak usage windows for capacity planning and rate limit tuning. + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents/activity" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "data": [ + { "dow": 1, "hour": 9, "count": 54 }, + { "dow": 1, "hour": 10, "count": 87 }, + { "dow": 3, "hour": 14, "count": 201 } + ] +} +``` + +**Interpreting the data**: `dow` is 0 (Sunday) through 6 (Saturday). `hour` is 0–23 UTC. +Only non-zero cells are returned — missing combinations had zero activity. Sort by `count` +descending to find your peak windows. + +**Using it**: If most activity is on weekday mornings UTC, ensure your rate limit headroom +covers that window. If weekend activity is unexpectedly high, investigate which agents are +active. + +--- + +## Per-agent usage summary + +`GET /api/v1/analytics/agents` + +Returns token issuance counts per agent for the current calendar month (UTC). Use this to +identify your most active agents and check if any single agent is consuming a +disproportionate share of your monthly token budget. + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "month": "2026-04", + "data": [ + { "agentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "tokenCount": 312 }, + { "agentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", "tokenCount": 87 } + ] +} +``` + +**Interpreting the data**: Each item shows an agent UUID and the number of tokens it has +issued this month. The response covers the full current calendar month from day 1 to now. +It resets on the first day of each month. + +**Using it**: Cross-reference `agentId` values against `GET /api/v1/agents` to identify which +agents by name. If one agent accounts for >80% of usage, investigate whether it is token +caching correctly or requesting tokens unnecessarily. diff --git a/docs/developers/quick-start.md b/docs/developers/quick-start.md index ad5f84c..4cf2fcd 100644 --- a/docs/developers/quick-start.md +++ b/docs/developers/quick-start.md @@ -1,6 +1,6 @@ # Quick Start — Register Your First Agent -This guide gets you from zero to a working agent identity with a valid OAuth 2.0 access token. It takes under 5 minutes. +This guide gets you from zero to a working agent identity inside an organization, with a valid OAuth 2.0 access token. It takes under 5 minutes. ## Prerequisites @@ -135,7 +135,45 @@ export BOOTSTRAP_TOKEN="" --- -## Step 5 — Register an agent +## Step 5 — Create an organization + +Agents are scoped to organizations. Create one now so your agent has an `organization_id` to belong to: + +```bash +curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Authorization: Bearer $BOOTSTRAP_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "My AI Project", + "slug": "my-ai-project" + }' | jq . +``` + +Example response (`201 Created`): + +```json +{ + "organizationId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "name": "My AI Project", + "slug": "my-ai-project", + "planTier": "free", + "maxAgents": 10, + "maxTokensPerMonth": 10000, + "status": "active", + "createdAt": "2026-04-04T09:00:00.000Z", + "updatedAt": "2026-04-04T09:00:00.000Z" +} +``` + +Save the `organizationId`: + +```bash +export ORG_ID="org-0a1b2c3d-e4f5-6789-abcd-ef0123456789" +``` + +--- + +## Step 6 — Register an agent ```bash curl -s -X POST http://localhost:3000/api/v1/agents \ @@ -147,7 +185,8 @@ curl -s -X POST http://localhost:3000/api/v1/agents \ "version": "1.0.0", "capabilities": ["data:read"], "owner": "my-team", - "deploymentEnv": "development" + "deploymentEnv": "development", + "organization_id": "'$ORG_ID'" }' | jq . ``` @@ -176,7 +215,7 @@ export AGENT_ID="a1b2c3d4-e5f6-7890-abcd-ef1234567890" --- -## Step 6 — Generate a credential +## Step 7 — Generate a credential ```bash curl -s -X POST "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials" \ @@ -208,7 +247,7 @@ export CLIENT_SECRET="sk_live_7f3a2b1c9d8e4f0a6b5c3d2e1f0a9b8c" --- -## Step 7 — Issue an access token +## Step 8 — Issue an access token Use the OAuth 2.0 Client Credentials flow. Note that the `/token` endpoint uses **form-encoded** body, not JSON: @@ -242,6 +281,14 @@ Your agent now has a valid JWT. Use it in the `Authorization: Bearer ` he ## What's next -- [Core Concepts](concepts.md) — understand AgentIdP, AGNTCY, and the agent identity model -- [Guides](guides/README.md) — step-by-step walkthroughs for credentials, tokens, and audit logs +- [Core Concepts](concepts.md) — understand AgentIdP, AGNTCY, orgs, DID, delegation, and tiers +- [Guides](guides/README.md) — step-by-step walkthroughs for all workflows - [API Reference](api-reference.md) — every endpoint documented with curl examples + +**New guides for Phase 6 features:** + +- [Use the Analytics Dashboard](guides/use-analytics-dashboard.md) — query token trends and activity +- [Manage API Tiers](guides/manage-api-tiers.md) — check limits and upgrade your plan +- [A2A Delegation](guides/a2a-delegation.md) — delegate authority between agents +- [Configure Webhooks](guides/configure-webhooks.md) — subscribe to real-time events +- [AGNTCY Compliance](guides/agntcy-compliance.md) — export agent cards and generate compliance reports diff --git a/docs/devops/README.md b/docs/devops/README.md index 4c2d6a4..99b0344 100644 --- a/docs/devops/README.md +++ b/docs/devops/README.md @@ -14,14 +14,15 @@ SentryAgent.ai AgentIdP is a Node.js REST API backed by PostgreSQL and Redis. It ## Documentation -| Document | What it covers | -|----------|----------------| -| [Architecture](architecture.md) | Components, ports, data flow, Redis key patterns | -| [Environment Variables](environment-variables.md) | Every env var — required, optional, format, examples | -| [Database](database.md) | Schema (4 tables), migrations, how to apply and verify | -| [Local Development](local-development.md) | docker-compose setup, startup, health checks | -| [Security](security.md) | JWT key generation and rotation, CORS, secret storage | -| [Operations](operations.md) | Startup order, graceful shutdown, log interpretation, troubleshooting | +| Document | Audience | Contents | +|----------|----------|---------| +| [Architecture](architecture.md) | All engineers | Components, ports, data flow, Redis key patterns | +| [Environment Variables](environment-variables.md) | All engineers | Every env var — required, optional, format, examples | +| [Database](database.md) | Backend, DevOps | Schema (26 tables/migrations), how to apply and verify | +| [Local Development](local-development.md) | All engineers | docker-compose setup, startup, health checks | +| [Security](security.md) | All engineers | JWT key generation and rotation, CORS, secret storage | +| [Operations](operations.md) | DevOps | Startup order, graceful shutdown, log interpretation, troubleshooting | +| [field-trial.md](field-trial.md) | DevOps engineers, QA | In-house Docker Compose field trial execution playbook | ## Quick Reference — Ports diff --git a/docs/devops/architecture.md b/docs/devops/architecture.md index f0a96c9..cfe5618 100644 --- a/docs/devops/architecture.md +++ b/docs/devops/architecture.md @@ -3,26 +3,49 @@ ## Component Overview ``` - ┌─────────────────────────────────────┐ - │ AgentIdP Application │ - │ Node.js / Express │ - │ Port 3000 │ - │ │ - │ Auth MW → RateLimit MW → Routes │ - │ ↓ ↓ │ - │ Controllers → Services → Repos │ - └──────────────┬──────────────┬────────┘ - │ │ - ┌──────────────▼──┐ ┌───────▼────────┐ - │ PostgreSQL 14 │ │ Redis 7 │ - │ Port 5432 │ │ Port 6379 │ - │ │ │ │ - │ agents │ │ Token revoke │ - │ credentials │ │ Rate limits │ - │ audit_events │ │ Monthly counts │ - │ token_revocati- │ │ │ - │ ons │ │ │ - └──────────────────┘ └─────────────────┘ + ┌───────────────────────────────────────────┐ + │ Next.js Portal (port 3001) │ + │ portal/ — Next.js 14 │ + │ /login /agents /credentials /audit │ + │ /analytics /settings/tier /compliance │ + │ /webhooks /marketplace │ + └────────────────┬──────────────────────────┘ + │ HTTP (localhost:3000) + ┌────────────────▼──────────────────────────┐ + │ AgentIdP Application │ + │ Node.js / Express (port 3000) │ + │ │ + │ TLS MW → Helmet → CORS → Morgan │ + │ Metrics MW → OrgContext MW │ + │ UsageMetering MW → TierEnforcement MW │ + │ Auth MW → OPA MW → Routes │ + │ ↓ │ + │ Controllers → Services → Repos │ + └──────────┬───────────────┬────────────────┘ + │ │ + ┌────────────────▼──┐ ┌────────▼────────┐ + │ PostgreSQL 14 │ │ Redis 7 │ + │ Port 5432 │ │ Port 6379 │ + │ │ │ │ + │ 26 migrations │ │ Rate limits │ + │ (001–026) │ │ Token revoke │ + │ organizations │ │ Monthly counts │ + │ agents + DID keys │ │ Tier counters │ + │ credentials │ │ Compliance cache│ + │ audit_events │ │ │ + │ token_revocations │ └──────────────────┘ + │ oidc_keys │ + │ federation_partne-│ ┌──────────────────┐ + │ rs │ │ HashiCorp Vault │ + │ webhook_subscript-│ │ (optional) │ + │ ions + deliveries │ │ KV v2 — creds │ + │ agent_marketplace │ └──────────────────┘ + │ github_oidc_trust │ + │ billing │ ┌──────────────────┐ + │ delegation_chains │ │ Stripe │ + │ analytics_events │ │ (optional) │ + │ tenant_tiers │ │ Billing/upgrades │ + └────────────────────┘ └──────────────────┘ ``` ## Components @@ -36,8 +59,12 @@ A stateless Express HTTP server. Every request is handled independently — no i | Layer | Responsibility | |-------|---------------| | Routes | Wire HTTP methods and paths to controllers | +| TLS middleware | Redirect HTTP → HTTPS when `ENFORCE_TLS=true` | | Auth middleware | Validate Bearer JWT (RS256 + Redis revocation check) | -| Rate limit middleware | Redis sliding-window counter per `client_id` | +| OrgContext middleware | Resolve `organization_id` from JWT and attach to `req` | +| UsageMetering middleware | Fire-and-forget analytics event recording | +| TierEnforcement middleware | Enforce daily API call and token limits via Redis (when `TIER_ENFORCEMENT=true`) | +| OPA middleware | Scope-based authorization via embedded Wasm or JSON policy | | Controllers | Parse and validate request, call service, return response | | Services | Business logic — no direct DB access | | Repositories | All SQL queries — no business logic | @@ -53,11 +80,14 @@ The application connects via a connection pool (`pg.Pool`) initialised from `DAT Ephemeral store for three use cases: -| Key pattern | Purpose | TTL | -|------------|---------|-----| -| `revoked:` | Token revocation list — checked on every authenticated request | Until token's `exp` | -| `rate::` | Request count per client per 60-second window | 60 seconds | -| `monthly:::` | Token issuance count for free tier limit enforcement | End of month | +| Key pattern | Example | Purpose | TTL | +|------------|---------|---------|-----| +| `revoked:` | `revoked:f1e2d3c4-...` | Revoked token JTI | Remaining token lifetime | +| `rate::` | `rate:a1b2c3...:29086156` | Request count per window | `RATE_LIMIT_WINDOW_MS` | +| `monthly:::` | `monthly:a1b2c3...:2026:3` | Monthly token issuance count | End of month | +| `rate:tier:calls:` | `rate:tier:calls:org-uuid` | Daily API call counter for tier enforcement | Until midnight UTC | +| `rate:tier:tokens:` | `rate:tier:tokens:org-uuid` | Daily token issuance counter for tier enforcement | Until midnight UTC | +| `compliance:report:` | `compliance:report:org-uuid` | Cached compliance report JSON | 5 minutes | **Redis is supplementary, not the source of truth.** Token revocations are also written to the `token_revocations` PostgreSQL table for durability across Redis restarts. On Redis restart, the revocation list is cold — previously revoked tokens will pass auth until the PostgreSQL-backed warm-up is implemented (Phase 2). @@ -107,21 +137,89 @@ PostgreSQL / Redis ## Service Map -| Route prefix | Service | Repository | -|-------------|---------|-----------| -| `/api/v1/agents` | `AgentService` | `AgentRepository` | -| `/api/v1/agents/:id/credentials` | `CredentialService` | `CredentialRepository` | -| `/api/v1/token` | `OAuth2Service` | `TokenRepository`, `CredentialRepository`, `AgentRepository` | -| `/api/v1/audit` | `AuditService` | `AuditRepository` | +| Route prefix | Controller | Service(s) | Repository/ies | +|-------------|-----------|-----------|----------------| +| `/api/v1/agents` | `AgentController` | `AgentService` | `AgentRepository` | +| `/api/v1/credentials` | `CredentialController` | `CredentialService` | `CredentialRepository` | +| `/api/v1/token` | `TokenController` | `OAuth2Service` | `TokenRepository`, `CredentialRepository`, `AgentRepository` | +| `/api/v1/audit` | `AuditController` | `AuditService` | `AuditRepository` | +| `/api/v1/organizations` | `OrgController` | `OrgService` | `OrgRepository` | +| `/api/v1/compliance/*` | `ComplianceController` | `ComplianceService` | `AuditRepository` | +| `/api/v1/analytics/*` | `AnalyticsController` | `AnalyticsService` | direct pool queries | +| `/api/v1/tiers/*` | `TierController` | `TierService` | pool queries, Stripe SDK | +| `/api/v1/webhooks` | `WebhookController` | `WebhookService` | `WebhookRepository` | +| `/api/v1/federation` | `FederationController` | `FederationService` | direct pool queries | +| `/api/v1/marketplace` | `MarketplaceController` | `MarketplaceService` | direct pool queries | +| `/api/v1/billing` | `BillingController` | `BillingService` | direct pool queries | +| `/.well-known/did.json`, `/api/v1/did/*` | `DIDController` | `DIDService` | `AgentRepository` | +| `/.well-known/openid-configuration`, `/api/v1/oidc/*` | `OIDCController` | `OIDCKeyService`, `IDTokenService` | direct pool queries | +| `/api/v1/oidc/trust-policies` | `OIDCTrustPolicyController` | `OIDCTrustPolicyService` | direct pool queries | +| `/api/v1/delegation` | `DelegationController` | `DelegationService` | direct pool queries | +| `/api/v1/scaffold` | `ScaffoldController` | `ScaffoldService` | — | +| `/health` | inline | — | pool, redis | +| `/metrics` | inline | — | prom-client | + +## New Services (Phases 3–6) + +| Service | Source file | Responsibility | +|---------|------------|----------------| +| `AnalyticsService` | `src/services/AnalyticsService.ts` | Fire-and-forget `recordEvent`, time-series `getTokenTrend`, heatmap `getAgentActivity`, per-agent `getAgentUsageSummary` | +| `TierService` | `src/services/TierService.ts` | `getStatus` (reads `tenant_tiers`), `initiateUpgrade` (creates Stripe Checkout Session), `applyUpgrade` (handles Stripe webhook), `enforceAgentLimit` | +| `ComplianceService` | `src/services/ComplianceService.ts` | `generateReport` (Redis-cached 5 min), `exportAgentCards` (AGNTCY format) | +| `DelegationService` | `src/services/DelegationService.ts` | A2A delegation chain creation and verification | +| `DIDService` | `src/services/DIDService.ts` | `did:web` identifier generation and DID document management | +| `OIDCKeyService` | `src/services/OIDCKeyService.ts` | OIDC key rotation, JWKS endpoint | +| `IDTokenService` | `src/services/IDTokenService.ts` | OIDC ID token issuance | +| `FederationService` | `src/services/FederationService.ts` | Cross-tenant agent identity federation | +| `WebhookService` | `src/services/WebhookService.ts` | Event subscriptions, delivery with retry, dead-letter queue | +| `VaultService` | `src/services/VaultService.ts` | HashiCorp Vault KV v2 read/write for credential storage | +| `BillingService` | `src/services/BillingService.ts` | Stripe customer and subscription management | +| `MarketplaceService` | `src/services/MarketplaceService.ts` | Agent listing and discovery | +| `OIDCTrustPolicyService` | `src/services/OIDCTrustPolicyService.ts` | GitHub OIDC trust policy management | +| `EventPublisher` | `src/services/EventPublisher.ts` | Routes domain events to webhook delivery and Kafka (if configured) | ## Ports | Service | Internal port | Exposed port (local dev) | |---------|--------------|--------------------------| | AgentIdP app | 3000 | 3000 | +| Next.js portal | 3001 | 3001 | | PostgreSQL | 5432 | 5432 | | Redis | 6379 | 6379 | +## API Routes (Phase 6 complete) + +Base path: `/api/v1` + +| Route | Method(s) | Auth | Feature flag | +|-------|----------|------|-------------| +| `/api/v1/agents` | GET, POST, PATCH, DELETE | Bearer JWT | always on | +| `/api/v1/credentials` | GET, POST, DELETE | Bearer JWT | always on | +| `/api/v1/token` | POST | none (client credentials) | always on | +| `/api/v1/audit` | GET | Bearer JWT | always on | +| `/api/v1/audit/verify` | GET | Bearer JWT | always on | +| `/api/v1/organizations` | GET, POST | Bearer JWT | always on | +| `/api/v1/compliance/controls` | GET | none | always on | +| `/api/v1/compliance/report` | GET | Bearer JWT | `COMPLIANCE_ENABLED=true` | +| `/api/v1/compliance/agent-cards` | GET | Bearer JWT | `COMPLIANCE_ENABLED=true` | +| `/api/v1/analytics/token-trend` | GET | Bearer JWT | `ANALYTICS_ENABLED=true` | +| `/api/v1/analytics/agent-activity` | GET | Bearer JWT | `ANALYTICS_ENABLED=true` | +| `/api/v1/analytics/usage-summary` | GET | Bearer JWT | `ANALYTICS_ENABLED=true` | +| `/api/v1/tiers/status` | GET | Bearer JWT | always on | +| `/api/v1/tiers/upgrade` | POST | Bearer JWT | always on | +| `/api/v1/webhooks` | GET, POST, DELETE | Bearer JWT | always on | +| `/api/v1/federation` | GET, POST | Bearer JWT | always on | +| `/api/v1/delegation` | GET, POST | Bearer JWT | always on | +| `/api/v1/marketplace` | GET | none | always on | +| `/api/v1/billing` | GET, POST | Bearer JWT | always on | +| `/api/v1/did/*` | GET | none | always on | +| `/api/v1/oidc/*` | GET, POST | mixed | always on | +| `/.well-known/openid-configuration` | GET | none | always on | +| `/.well-known/jwks.json` | GET | none | always on | +| `/.well-known/did.json` | GET | none | always on | +| `/health` | GET | none | always on | +| `/metrics` | GET | none | always on | + ## Graceful Shutdown The server listens for `SIGTERM` and `SIGINT`. On receipt: diff --git a/docs/devops/database.md b/docs/devops/database.md index 2182483..0bbaec8 100644 --- a/docs/devops/database.md +++ b/docs/devops/database.md @@ -1,18 +1,28 @@ # Database -AgentIdP uses PostgreSQL 14+ as its primary data store. The schema consists of four tables managed by a custom migration runner. +AgentIdP uses PostgreSQL 14+ as its primary data store. The schema consists of 26 migrations managed by a custom migration runner. --- ## Schema Overview ``` -agents - └── credentials (FK: client_id → agents.agent_id, CASCADE DELETE) - -audit_events (no FK — append-only, agent_id is informational) +organizations + ├── agents (FK: organization_id → organizations.org_id) + │ ├── credentials (FK: client_id → agents.agent_id, CASCADE DELETE) + │ └── agent_did_keys (FK: agent_id → agents.agent_id) + └── audit_events (FK: organization_id — informational, no cascade) token_revocations (no FK — independent revocation store) +oidc_keys (standalone — OIDC signing key rotation) +federation_partners (standalone — cross-tenant identity) +webhook_subscriptions → webhook_deliveries (FK: subscription_id) +agent_marketplace (standalone — agent discovery catalog) +github_oidc_trust_policies (standalone — CI/CD trust) +billing (FK: org_id → organizations.org_id — one row per org) +delegation_chains (standalone — A2A delegation records) +analytics_events (FK: organization_id — append-only) +tenant_tiers (FK: org_id → organizations.org_id — one row per org) ``` --- @@ -134,6 +144,234 @@ Durable record of revoked JWT tokens. Supplements Redis for durability across Re --- +### `organizations` + +Created by migration `006_create_organizations_table.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `org_id` | `UUID` | No | Primary key | +| `name` | `VARCHAR(255)` | No | Organisation display name | +| `slug` | `VARCHAR(64)` | No | URL-safe unique identifier | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `agent_did_keys` + +Created by migration `012_create_agent_did_keys_table.sql`. + +Stores the DID document key material for each agent. One agent may have multiple keys for +rotation purposes. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `agent_id` | `UUID` | No | FK → `agents.agent_id` | +| `key_id` | `VARCHAR(255)` | No | DID key fragment identifier | +| `public_key_jwk` | `JSONB` | No | Public key in JWK format | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### DID columns on `agents` + +Added by migration `013_add_did_columns_to_agents.sql`: + +- `did` — `VARCHAR(512)` nullable — the `did:web` identifier for this agent +- `did_document` — `JSONB` nullable — full DID document + +--- + +### `oidc_keys` + +Created by migration `014_create_oidc_keys_table.sql`. + +Stores RSA key pairs used for OIDC ID token signing. Supports key rotation — active key is +determined by the most recently created row. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `kid` | `VARCHAR(128)` | No | Key ID — referenced in JWKS | +| `private_key_pem` | `TEXT` | No | Encrypted RSA private key (pgcrypto) | +| `public_key_pem` | `TEXT` | No | RSA public key | +| `algorithm` | `VARCHAR(16)` | No | Always `RS256` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `federation_partners` + +Created by migration `015_create_federation_partners_table.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | Owning organisation | +| `partner_name` | `VARCHAR(255)` | No | Display name | +| `partner_jwks_url` | `TEXT` | No | URL to partner's JWKS endpoint | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `webhook_subscriptions` + +Created by migration `016_create_webhook_subscriptions_table.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | Owning organisation | +| `event_type` | `VARCHAR(128)` | No | Event type filter (e.g. `agent.created`) | +| `target_url` | `TEXT` | No | HTTPS delivery endpoint | +| `secret` | `VARCHAR(255)` | Yes | HMAC signing secret for delivery verification | +| `active` | `BOOLEAN` | No | Default: `true` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `webhook_deliveries` + +Created by migration `017_create_webhook_deliveries_table.sql`. + +Records each delivery attempt for a webhook event, including the dead-letter queue entries. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `subscription_id` | `UUID` | No | FK → `webhook_subscriptions.id` | +| `event_type` | `VARCHAR(128)` | No | Event type delivered | +| `payload` | `JSONB` | No | Full event payload | +| `status` | `VARCHAR(32)` | No | `pending`, `delivered`, `failed`, `dead_letter` | +| `response_status` | `INTEGER` | Yes | HTTP status from delivery endpoint | +| `attempt_count` | `INTEGER` | No | Default: `0` | +| `last_attempted_at` | `TIMESTAMPTZ` | Yes | | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +**Dead-letter queue:** After 3 failed delivery attempts, the row status is set to `dead_letter` +and the `agentidp_webhook_dead_letters_total` Prometheus counter is incremented. The Prometheus +metric label is `event_type`. + +--- + +### pgcrypto extension + +Enabled by migration `018_enable_pgcrypto.sql`. Used for encrypting sensitive columns in +`oidc_keys` and credential data. + +--- + +### `agent_marketplace` + +Created by migration `021_add_agent_marketplace.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `agent_id` | `UUID` | No | FK → `agents.agent_id` | +| `listing_name` | `VARCHAR(255)` | No | Display name in marketplace | +| `description` | `TEXT` | Yes | Markdown description | +| `tags` | `TEXT[]` | No | Searchable tags. Default: `{}` | +| `published` | `BOOLEAN` | No | Default: `false` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `github_oidc_trust_policies` + +Created by migration `022_add_github_oidc_trust_policies.sql`. + +Maps GitHub Actions OIDC claims to agent identities for CI/CD token exchange. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | Owning organisation | +| `repository` | `VARCHAR(512)` | No | GitHub repository slug (`owner/repo`) | +| `branch` | `VARCHAR(255)` | Yes | Branch filter (null = any branch) | +| `agent_id` | `UUID` | No | Agent to issue a token for on match | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `billing` + +Created by migration `023_add_billing.sql`. + +One row per organisation. Tracks the org's Stripe customer and subscription state. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | FK → `organizations.org_id` (UNIQUE) | +| `stripe_customer_id` | `VARCHAR(255)` | Yes | Stripe Customer ID | +| `stripe_subscription_id` | `VARCHAR(255)` | Yes | Stripe Subscription ID | +| `status` | `VARCHAR(64)` | No | Stripe subscription status or `none` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `delegation_chains` + +Created by migration `024_add_delegation_chains.sql`. + +Records A2A delegation grants created via the delegation API. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `delegator_agent_id` | `UUID` | No | Agent granting the delegation | +| `delegate_agent_id` | `UUID` | No | Agent receiving the delegation | +| `scopes` | `TEXT[]` | No | Scopes being delegated | +| `expires_at` | `TIMESTAMPTZ` | Yes | Optional expiry | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +### `analytics_events` + +Created by migration `025_add_analytics_events.sql`. + +Append-only event store for analytics. Supports token trend, agent activity, and usage summary +queries. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `organization_id` | `UUID` | No | Owning organisation | +| `date` | `DATE` | No | Calendar date of the event (UTC) | +| `metric_type` | `VARCHAR(64)` | No | e.g. `token_issued`, `agent_called` | +| `count` | `INTEGER` | No | Event count for this date+type | + +**Index:** `(organization_id, date DESC)` for fast time-series queries. + +--- + +### `tenant_tiers` + +Created by migration `026_add_tenant_tiers.sql`. + +One row per organisation. Stores the current tier and enforces tier limits via the +`tierEnforcement` middleware. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | FK → `organizations.org_id` (UNIQUE) | +| `tier` | `ENUM('free','pro','enterprise')` | No | Current tier. Default: `free` | +| `updated_at` | `TIMESTAMPTZ` | No | Last tier change. Default: `NOW()` | + +**Tier limits** (from `src/config/tiers.ts`): + +| Tier | Max Agents | Max API Calls/Day | Max Tokens/Day | +|------|-----------|-------------------|----------------| +| free | 10 | 1,000 | 1,000 | +| pro | 100 | 50,000 | 50,000 | +| enterprise | unlimited | unlimited | unlimited | + +--- + ## Migration Runner Migrations are managed by `scripts/migrate.ts`. It reads `.sql` files from `src/db/migrations/` in alphabetical order, tracks applied migrations in a `schema_migrations` table, and executes only unapplied migrations — each in its own transaction. @@ -160,10 +398,11 @@ Expected output (first run): Running database migrations... ✓ Applied: 001_create_agents.sql ✓ Applied: 002_create_credentials.sql - ✓ Applied: 003_create_audit_events.sql - ✓ Applied: 004_create_tokens.sql + ... + ✓ Applied: 025_add_analytics_events.sql + ✓ Applied: 026_add_tenant_tiers.sql -Migrations complete. 4 migration(s) applied. +Migrations complete. 26 migration(s) applied. ``` Expected output (already applied): @@ -191,9 +430,10 @@ Expected output: -----------------------------------+------------------------------- 001_create_agents.sql | 2026-03-28 09:00:00.000000+00 002_create_credentials.sql | 2026-03-28 09:00:00.000000+00 - 003_create_audit_events.sql | 2026-03-28 09:00:00.000000+00 - 004_create_tokens.sql | 2026-03-28 09:00:00.000000+00 -(4 rows) + ... + 025_add_analytics_events.sql | 2026-04-04 09:00:00.000000+00 + 026_add_tenant_tiers.sql | 2026-04-04 09:00:00.000000+00 +(26 rows) ``` ### Adding a new migration @@ -214,6 +454,15 @@ There is no automated rollback. To undo a migration: ## Connection Pool -The application uses `pg.Pool` with default settings (max 10 connections). The pool is a singleton — one pool per process instance. +The application uses `pg.Pool` with settings read from environment variables. The pool is a +singleton — one pool per process instance. -To override pool size, modify `src/db/pool.ts`. In production, ensure `DATABASE_URL` includes connection pool parameters if using PgBouncer or a managed connection pooler. +| Variable | Default | Description | +|----------|---------|-------------| +| `DB_POOL_MAX` | `20` | Maximum connections | +| `DB_POOL_MIN` | `2` | Minimum idle connections | +| `DB_POOL_IDLE_TIMEOUT_MS` | `30000` | Idle eviction timeout (ms) | +| `DB_POOL_CONNECTION_TIMEOUT_MS` | `5000` | Acquisition timeout (ms) | + +Pool size is exposed as Prometheus metrics: `agentidp_db_pool_active_connections` and +`agentidp_db_pool_waiting_requests`. Monitor these in production to detect pool exhaustion. diff --git a/docs/devops/deployment.md b/docs/devops/deployment.md index aec5e1e..11ab79c 100644 --- a/docs/devops/deployment.md +++ b/docs/devops/deployment.md @@ -543,6 +543,24 @@ All environment variables injected into the AgentIdP container are documented in | `VAULT_ADDR` | No | Task definition env var | Cloud Run env var | | `VAULT_TOKEN` | No | Secrets Manager: `///vault-token` | Secret Manager: `-vault-token` | | `VAULT_MOUNT` | No | Task definition env var (default: `secret`) | Cloud Run env var (default: `secret`) | +| `BILLING_ENABLED` | No | Task definition env var | Cloud Run env var | +| `STRIPE_SECRET_KEY` | Only if billing enabled | Secrets Manager: `///stripe-secret-key` | Secret Manager: `-stripe-secret-key` | +| `STRIPE_WEBHOOK_SECRET` | Only if billing enabled | Secrets Manager: `///stripe-webhook-secret` | Secret Manager: `-stripe-webhook-secret` | +| `STRIPE_PRICE_ID` | Only if billing enabled | Task definition env var | Cloud Run env var | +| `ANALYTICS_ENABLED` | No | Task definition env var (default: `true`) | Cloud Run env var | +| `TIER_ENFORCEMENT` | No | Task definition env var (default: `true`) | Cloud Run env var | +| `COMPLIANCE_ENABLED` | No | Task definition env var (default: `true`) | Cloud Run env var | +| `REDIS_RATE_LIMIT_ENABLED` | No | Task definition env var | Cloud Run env var | +| `RATE_LIMIT_WINDOW_MS` | No | Task definition env var (default: `60000`) | Cloud Run env var | +| `RATE_LIMIT_MAX_REQUESTS` | No | Task definition env var (default: `100`) | Cloud Run env var | +| `DB_POOL_MAX` | No | Task definition env var (default: `20`) | Cloud Run env var | +| `DB_POOL_MIN` | No | Task definition env var (default: `2`) | Cloud Run env var | +| `DB_POOL_IDLE_TIMEOUT_MS` | No | Task definition env var (default: `30000`) | Cloud Run env var | +| `DB_POOL_CONNECTION_TIMEOUT_MS` | No | Task definition env var (default: `5000`) | Cloud Run env var | +| `KAFKA_BROKERS` | No | Task definition env var | Cloud Run env var | +| `ENFORCE_TLS` | No | Task definition env var | Cloud Run env var | +| `OPA_URL` | No | Task definition env var | Cloud Run env var | +| `VAULT_KV_MOUNT` | No | Task definition env var (default: `secret`) | Cloud Run env var | ### Updating a Secret diff --git a/docs/devops/environment-variables.md b/docs/devops/environment-variables.md index d090886..62d9ea7 100644 --- a/docs/devops/environment-variables.md +++ b/docs/devops/environment-variables.md @@ -20,7 +20,7 @@ PostgreSQL connection string. | **Format** | `postgresql://:@:/` | | **Example** | `postgresql://sentryagent:sentryagent@localhost:5432/sentryagent_idp` | -The application uses `pg.Pool` with this connection string. Connection pool size uses the `pg` default (10 connections). +The application uses `pg.Pool` with this connection string. Pool sizing is controlled by the optional `DB_POOL_*` variables documented below. --- @@ -72,6 +72,10 @@ Every authenticated request verifies the JWT signature using this key. If this k --- +> **Note on Billing:** `STRIPE_SECRET_KEY`, `STRIPE_WEBHOOK_SECRET`, and `STRIPE_PRICE_ID` are +> required when `BILLING_ENABLED=true`. For local development, set `BILLING_ENABLED=false` and +> use placeholder values. + ## Optional Variables These variables have defaults and do not need to be set for local development. @@ -117,6 +121,257 @@ KV v2 secrets engine mount path. --- +### `BILLING_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `false` | +| **Values** | `true`, `false` | +| **Example** | `BILLING_ENABLED=false` | + +Gates Stripe billing integration and free-tier agent limit enforcement. When `false`, no Stripe +API calls are made and all tier limits are unenforced. Set to `false` for in-house testing. + +--- + +### `STRIPE_SECRET_KEY` + +| | | +|-|-| +| **Required** | Only when `BILLING_ENABLED=true` | +| **Format** | Stripe secret key string (`sk_live_*` or `sk_test_*`) | +| **Example** | `STRIPE_SECRET_KEY=sk_test_placeholder` | + +Stripe API key used to create Checkout Sessions for tier upgrades. Never use a live key in +development. + +--- + +### `STRIPE_WEBHOOK_SECRET` + +| | | +|-|-| +| **Required** | Only when `BILLING_ENABLED=true` | +| **Format** | Stripe webhook signing secret (`whsec_*`) | +| **Example** | `STRIPE_WEBHOOK_SECRET=whsec_placeholder` | + +Used to verify the HMAC signature on incoming Stripe webhook events. Without this, the billing +webhook endpoint will reject all events. + +--- + +### `STRIPE_PRICE_ID` + +| | | +|-|-| +| **Required** | Only when `BILLING_ENABLED=true` | +| **Format** | Stripe Price ID string (`price_*`) | +| **Example** | `STRIPE_PRICE_ID=price_placeholder` | + +The Stripe Price object used when creating a Checkout Session for the Pro tier upgrade. + +--- + +### `ANALYTICS_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `true` | +| **Values** | `true`, `false` | +| **Example** | `ANALYTICS_ENABLED=true` | + +Feature flag that gates the `/api/v1/analytics/*` routes. When `false`, the analytics router is +not mounted and all analytics endpoints return 404. Events are still recorded internally +regardless of this flag. + +--- + +### `TIER_ENFORCEMENT` + +| | | +|-|-| +| **Required** | No | +| **Default** | `true` | +| **Values** | `true`, `false` | +| **Example** | `TIER_ENFORCEMENT=true` | + +Enables Redis-backed tier limit enforcement per tenant. When `true`, the `tierEnforcement` +middleware checks daily API call and token counts against per-tier limits defined in +`src/config/tiers.ts`. Enterprise tenants with `maxCallsPerDay: Infinity` bypass enforcement. +When `false`, no tier limits are enforced. + +--- + +### `COMPLIANCE_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `true` | +| **Values** | `true`, `false` | +| **Example** | `COMPLIANCE_ENABLED=true` | + +Feature flag that gates the report and agent-card export endpoints under +`/api/v1/compliance/*`. When `false`, those endpoints return 404. The SOC2 controls endpoint +(`/api/v1/compliance/controls`) and audit chain verification (`/api/v1/audit/verify`) are +always enabled regardless of this flag. + +--- + +### `REDIS_RATE_LIMIT_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `false` | +| **Values** | `true`, `false` | +| **Example** | `REDIS_RATE_LIMIT_ENABLED=true` | + +When `true`, rate limiting uses a Redis-backed sliding-window counter per `client_id`. When +`false`, rate limiting uses an in-process `RateLimiterMemory` store (does not share state +across multiple app instances). + +--- + +### `RATE_LIMIT_WINDOW_MS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `60000` | +| **Format** | Integer (milliseconds) | +| **Example** | `RATE_LIMIT_WINDOW_MS=60000` | + +Duration of the sliding-window rate limit period in milliseconds. Only effective when +`REDIS_RATE_LIMIT_ENABLED=true`. + +--- + +### `RATE_LIMIT_MAX_REQUESTS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `100` | +| **Format** | Integer | +| **Example** | `RATE_LIMIT_MAX_REQUESTS=100` | + +Maximum number of requests allowed per `client_id` within `RATE_LIMIT_WINDOW_MS`. Requests +exceeding this limit receive `429 RATE_LIMIT_EXCEEDED`. + +--- + +### `DB_POOL_MAX` + +| | | +|-|-| +| **Required** | No | +| **Default** | `20` | +| **Format** | Integer | +| **Example** | `DB_POOL_MAX=20` | + +Maximum number of PostgreSQL connections in the pool. Increase for high-throughput production +deployments. Ensure your PostgreSQL instance's `max_connections` is set to at least +`DB_POOL_MAX × number_of_app_instances + 5`. + +--- + +### `DB_POOL_MIN` + +| | | +|-|-| +| **Required** | No | +| **Default** | `2` | +| **Format** | Integer | +| **Example** | `DB_POOL_MIN=2` | + +Minimum number of idle connections kept alive in the pool. + +--- + +### `DB_POOL_IDLE_TIMEOUT_MS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `30000` | +| **Format** | Integer (milliseconds) | +| **Example** | `DB_POOL_IDLE_TIMEOUT_MS=30000` | + +Milliseconds a connection can sit idle before being evicted from the pool. + +--- + +### `DB_POOL_CONNECTION_TIMEOUT_MS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `5000` | +| **Format** | Integer (milliseconds) | +| **Example** | `DB_POOL_CONNECTION_TIMEOUT_MS=5000` | + +Milliseconds the pool waits for a connection to become available before throwing a connection +timeout error. + +--- + +### `VAULT_KV_MOUNT` + +| | | +|-|-| +| **Required** | No | +| **Default** | `secret` | +| **Format** | String (no leading or trailing slash) | +| **Example** | `VAULT_KV_MOUNT=agentidp` | + +KV v2 secrets engine mount path used by `VaultService`. Equivalent to the existing `VAULT_MOUNT` +variable — note that `.env.example` uses `VAULT_KV_MOUNT`; the underlying service reads either. + +--- + +### `OPA_URL` + +| | | +|-|-| +| **Required** | No | +| **Format** | URL string | +| **Example** | `OPA_URL=http://localhost:8181` | + +URL of a running OPA server for external policy evaluation. When unset, the application falls +back to the embedded Wasm or JSON policy in `POLICY_DIR`. Used for health check reporting. + +--- + +### `KAFKA_BROKERS` + +| | | +|-|-| +| **Required** | No | +| **Format** | Comma-separated broker addresses | +| **Example** | `KAFKA_BROKERS=localhost:9092` | + +When set, the `KafkaAdapter` publishes domain events to Kafka. When unset, Kafka publishing is +disabled and events are only delivered via the `WebhookService`. + +--- + +### `ENFORCE_TLS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `false` | +| **Values** | `true`, `false` | +| **Example** | `ENFORCE_TLS=true` | + +When `true`, the `tlsEnforcementMiddleware` redirects all HTTP requests to HTTPS. Enable in +production deployments where TLS termination is handled at the application layer. + +--- + ### `POLICY_DIR` Directory containing OPA policy files (`authz.rego`, `authz.wasm`, `data/scopes.json`). @@ -178,33 +433,53 @@ In production, set this to the specific origin(s) that should be permitted to ca ## Complete `.env` Example ``` -# Database -DATABASE_URL=postgresql://sentryagent:sentryagent@localhost:5432/sentryagent_idp - -# Redis -REDIS_URL=redis://localhost:6379 - -# Application -PORT=3000 +# ── Server ────────────────────────────────────────────────────────────────── NODE_ENV=development -CORS_ORIGIN=* +PORT=3000 +CORS_ORIGIN=http://localhost:3001 -# JWT Keys (generate with openssl — see docs/devops/security.md) -JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY----- -MIIEowIBAAKCAQEA... ------END RSA PRIVATE KEY-----" +# ── Database ───────────────────────────────────────────────────────────────── +DATABASE_URL=postgresql://sentryagent:sentryagent@localhost:5432/sentryagent_idp +DB_POOL_MAX=20 +DB_POOL_MIN=2 +DB_POOL_IDLE_TIMEOUT_MS=30000 +DB_POOL_CONNECTION_TIMEOUT_MS=5000 -JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY----- -MIIBIjANBgkq... ------END PUBLIC KEY-----" +# ── Redis ──────────────────────────────────────────────────────────────────── +REDIS_URL=redis://localhost:6379 +REDIS_RATE_LIMIT_ENABLED=true +RATE_LIMIT_WINDOW_MS=60000 +RATE_LIMIT_MAX_REQUESTS=100 -# HashiCorp Vault (Phase 2 — optional, omit to use bcrypt mode) +# ── JWT Keys (generate with openssl — see docs/devops/security.md) ────────── +JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\nMIIEow...\n-----END RSA PRIVATE KEY-----" +JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\nMIIBIj...\n-----END PUBLIC KEY-----" + +# ── Billing (Stripe) — set BILLING_ENABLED=false for local/in-house testing ─ +BILLING_ENABLED=false +STRIPE_SECRET_KEY=sk_test_placeholder +STRIPE_WEBHOOK_SECRET=whsec_placeholder +STRIPE_PRICE_ID=price_placeholder + +# ── Phase 6 Feature Flags ───────────────────────────────────────────────────── +ANALYTICS_ENABLED=true +TIER_ENFORCEMENT=true +COMPLIANCE_ENABLED=true + +# ── HashiCorp Vault (optional) ──────────────────────────────────────────────── # VAULT_ADDR=http://127.0.0.1:8200 # VAULT_TOKEN=hvs.XXXXXXXXXXXXXXXXXXXXXX -# VAULT_MOUNT=secret +# VAULT_KV_MOUNT=secret -# OPA Policy Engine (Phase 2 — optional, defaults to /policies) +# ── OPA (optional) ─────────────────────────────────────────────────────────── # POLICY_DIR=/etc/sentryagent/policies +# OPA_URL=http://localhost:8181 + +# ── Kafka (optional) ───────────────────────────────────────────────────────── +# KAFKA_BROKERS=localhost:9092 + +# ── TLS ────────────────────────────────────────────────────────────────────── +# ENFORCE_TLS=true ``` > Do not commit `.env` to version control. Add it to `.gitignore`. @@ -220,3 +495,8 @@ The application validates required variables at startup in this order: 3. `REDIS_URL` — checked when `getRedisClient()` is first called (during `createApp()`) If any required variable is missing, the process exits with an error before binding to any port. + +> **Feature flags** (`BILLING_ENABLED`, `ANALYTICS_ENABLED`, `TIER_ENFORCEMENT`, +> `COMPLIANCE_ENABLED`) are read at startup. `ANALYTICS_ENABLED` and `COMPLIANCE_ENABLED` +> determine whether their respective routers are mounted — changing these values requires a +> process restart. diff --git a/docs/devops/field-trial.md b/docs/devops/field-trial.md new file mode 100644 index 0000000..9f4c875 --- /dev/null +++ b/docs/devops/field-trial.md @@ -0,0 +1,946 @@ +# SentryAgent.ai AgentIdP — In-House Field Trial Guide + +This guide is the execution playbook for in-house Docker Compose field trials of SentryAgent.ai +AgentIdP. Follow each phase in order. All commands are exact — copy and paste them directly. + +Estimated time to complete all phases: 45–60 minutes. + +Prerequisites must be satisfied before Section 0. + +## Prerequisites + +**Docker 24+ and Docker Compose 2.20+** + +```bash +docker --version +# Expected: Docker version 24.x.x or higher + +docker compose version +# Expected: Docker Compose version v2.20.x or higher +``` + +**Node.js 18+ via nvm** + +```bash +export NVM_DIR="$HOME/.nvm" && source "$NVM_DIR/nvm.sh" +node --version +# Expected: v18.x.x or higher +``` + +**openssl** + +```bash +openssl version +# Expected: OpenSSL 1.1.x or higher (any version) +``` + +**Git repo cloned** + +```bash +git clone https://git.sentryagent.ai/vijay_admin/sentryagent-idp.git +cd sentryagent-idp +``` + +**Ports free** + +The following ports must be free on the machine before starting: + +| Port | Service | +|------|---------| +| 3000 | AgentIdP backend | +| 3001 | Next.js portal | +| 5432 | PostgreSQL | +| 6379 | Redis | + +Check all ports: + +```bash +lsof -i :3000 -i :3001 -i :5432 -i :6379 +# Expected: no output (all ports free) +``` + +If any port is in use, kill the occupying process: + +```bash +lsof -ti: | xargs kill +``` + +--- + +## Section 0 — Environment Setup + +This section guides the engineer through creating a valid `.env` file for field trial use. + +**Step 0.1 — Copy `.env.example`** + +```bash +cp .env.example .env +``` + +**Step 0.2 — Generate RSA-2048 keypair** + +Generate the JWT signing keys: + +```bash +openssl genrsa -out private.pem 2048 +openssl rsa -in private.pem -pubout -out public.pem +``` + +Verify the keys are valid: + +```bash +openssl rsa -in private.pem -check -noout +# Expected: RSA key ok + +openssl rsa -in public.pem -pubin -noout -text 2>&1 | head -3 +# Expected: Public-Key: (2048 bit) +``` + +**Step 0.3 — Write keys into `.env`** + +Write the private key as a single-line PEM with `\n` separators: + +```bash +PRIVATE_KEY_LINE=$(awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}' private.pem) +sed -i "s|JWT_PRIVATE_KEY=.*|JWT_PRIVATE_KEY=\"${PRIVATE_KEY_LINE}\"|" .env +``` + +Write the public key: + +```bash +PUBLIC_KEY_LINE=$(awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}' public.pem) +sed -i "s|JWT_PUBLIC_KEY=.*|JWT_PUBLIC_KEY=\"${PUBLIC_KEY_LINE}\"|" .env +``` + +Verify both keys are present and non-empty: + +```bash +grep -c "BEGIN RSA PRIVATE KEY" .env +# Expected: 1 + +grep -c "BEGIN PUBLIC KEY" .env +# Expected: 1 +``` + +**Step 0.4 — Configure field trial values** + +Set the following values in `.env`. These are the correct values for an in-house field trial +(no real Stripe, no Kafka, no Vault): + +```bash +# Disable real Stripe billing for field trial +sed -i "s|BILLING_ENABLED=.*|BILLING_ENABLED=false|" .env +sed -i "s|STRIPE_SECRET_KEY=.*|STRIPE_SECRET_KEY=sk_test_placeholder|" .env +sed -i "s|STRIPE_WEBHOOK_SECRET=.*|STRIPE_WEBHOOK_SECRET=whsec_placeholder|" .env +sed -i "s|STRIPE_PRICE_ID=.*|STRIPE_PRICE_ID=price_placeholder|" .env + +# Keep feature flags at defaults +sed -i "s|ANALYTICS_ENABLED=.*|ANALYTICS_ENABLED=true|" .env +sed -i "s|TIER_ENFORCEMENT=.*|TIER_ENFORCEMENT=true|" .env +sed -i "s|COMPLIANCE_ENABLED=.*|COMPLIANCE_ENABLED=true|" .env + +# Allow portal CORS +sed -i "s|CORS_ORIGIN=.*|CORS_ORIGIN=http://localhost:3001|" .env +``` + +**Step 0.5 — Verify final `.env`** + +```bash +grep -E "^(DATABASE_URL|REDIS_URL|JWT_PRIVATE_KEY|JWT_PUBLIC_KEY|BILLING_ENABLED|ANALYTICS_ENABLED|TIER_ENFORCEMENT|COMPLIANCE_ENABLED|CORS_ORIGIN)=" .env +``` + +Expected output (values abbreviated): + +``` +DATABASE_URL=postgresql://agentidp:password@localhost:5432/agentidp +REDIS_URL=redis://localhost:6379 +JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\n... +JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\n... +BILLING_ENABLED=false +ANALYTICS_ENABLED=true +TIER_ENFORCEMENT=true +COMPLIANCE_ENABLED=true +CORS_ORIGIN=http://localhost:3001 +``` + +--- + +## Phase A — Stack Startup + +**Step A.1 — Build and start the full stack** + +```bash +docker compose up --build -d +``` + +This builds the `app` container image and starts all three services. The `app` service waits +for `postgres` and `redis` to pass their health checks before starting. + +**Step A.2 — Verify all services are healthy** + +```bash +docker compose ps +``` + +Expected output — all three services must show `healthy`: + +``` +NAME IMAGE STATUS +sentryagent-idp-app-1 sentryagent-idp-app running (healthy) +sentryagent-idp-postgres-1 postgres:14-alpine running (healthy) +sentryagent-idp-redis-1 redis:7-alpine running (healthy) +``` + +If any service shows `starting` or `unhealthy`, wait 15 seconds and run `docker compose ps` +again. If a service remains unhealthy after 60 seconds, see Troubleshooting. + +**Step A.3 — Run database migrations** + +```bash +docker compose exec app npm run db:migrate +``` + +Expected output: + +``` +Running database migrations... + ✓ Applied: 001_create_agents.sql + ✓ Applied: 002_create_credentials.sql + ... + ✓ Applied: 025_add_analytics_events.sql + ✓ Applied: 026_add_tenant_tiers.sql + +Migrations complete. 26 migration(s) applied. +``` + +All 26 migrations must apply without error before proceeding. + +**Step A.4 — Verify application health** + +```bash +curl -s http://localhost:3000/health | jq . +``` + +Expected response: + +```json +{"status":"ok"} +``` + +**Step A.5 — Verify Prometheus metrics** + +```bash +curl -s http://localhost:3000/metrics | head -20 +``` + +Expected: Prometheus text output beginning with `# HELP` lines. Verify these specific metrics +are present: + +```bash +curl -s http://localhost:3000/metrics | grep -E "^# HELP agentidp_" +``` + +Expected: at least 19 lines matching `# HELP agentidp_*`. + +--- + +## Phase B — Core Product Journeys + +This phase tests the end-to-end agent identity lifecycle. Run each step in order. Each step +depends on the output of the previous step. + +> **Note on tokens:** The steps below use shell variables to pass values between commands. Run +> all commands in the same terminal session. + +**Step B.1 — Create an organisation** + +```bash +ORG_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Content-Type: application/json" \ + -d '{"name":"Field Trial Org","slug":"field-trial"}') + +echo $ORG_RESPONSE | jq . +ORG_ID=$(echo $ORG_RESPONSE | jq -r '.org_id') +echo "ORG_ID: $ORG_ID" +``` + +Expected: HTTP 201 response body containing an `org_id` UUID. `ORG_ID` must be a non-empty UUID. + +**Step B.2 — Register an agent** + +```bash +AGENT_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Content-Type: application/json" \ + -d "{ + \"email\": \"trial-agent@field-trial.sentryagent.ai\", + \"agent_type\": \"classifier\", + \"version\": \"1.0.0\", + \"capabilities\": [\"documents:read\", \"documents:classify\"], + \"owner\": \"field-trial-team\", + \"deployment_env\": \"development\", + \"organization_id\": \"$ORG_ID\" + }") + +echo $AGENT_RESPONSE | jq . +AGENT_ID=$(echo $AGENT_RESPONSE | jq -r '.agent_id') +echo "AGENT_ID: $AGENT_ID" +``` + +Expected: HTTP 201 response body containing an `agent_id` UUID. + +**Step B.3 — Generate credentials** + +```bash +CRED_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/credentials \ + -H "Content-Type: application/json" \ + -d "{\"agent_id\": \"$AGENT_ID\"}") + +echo $CRED_RESPONSE | jq . +CLIENT_ID=$(echo $CRED_RESPONSE | jq -r '.client_id') +CLIENT_SECRET=$(echo $CRED_RESPONSE | jq -r '.client_secret') +echo "CLIENT_ID: $CLIENT_ID" +echo "CLIENT_SECRET: $CLIENT_SECRET" +``` + +Expected: HTTP 201 response body containing `client_id` and `client_secret`. The `client_secret` +is only returned once — save it now. + +**Step B.4 — Issue an OAuth 2.0 access token** + +```bash +TOKEN_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/token \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=$CLIENT_ID&client_secret=$CLIENT_SECRET&scope=read") + +echo $TOKEN_RESPONSE | jq . +ACCESS_TOKEN=$(echo $TOKEN_RESPONSE | jq -r '.access_token') +echo "ACCESS_TOKEN obtained: ${ACCESS_TOKEN:0:30}..." +``` + +Expected: HTTP 200 response body with `access_token`, `token_type: "Bearer"`, `expires_in: 3600`, +`scope: "read"`. + +**Step B.5 — Use the token on a protected endpoint** + +```bash +curl -s -H "Authorization: Bearer $ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents | jq . +``` + +Expected: HTTP 200 with a JSON array of agents including the agent registered in Step B.2. + +**Step B.6 — Inspect JWT claims** + +Decode and inspect the access token structure (without verifying signature): + +```bash +echo $ACCESS_TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq . +``` + +Expected claims: + +```json +{ + "sub": "", + "iss": "https://sentryagent.ai", + "aud": "sentryagent-api", + "scope": "read", + "agent_id": "", + "organization_id": "", + "iat": "", + "exp": "", + "jti": "" +} +``` + +Verify `exp - iat = 3600` (1 hour TTL). + +**Step B.7 — Rotate credentials and verify old token is rejected** + +Rotate the credentials (generates a new client_secret, revokes the old one): + +```bash +ROTATE_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/credentials \ + -H "Content-Type: application/json" \ + -d "{\"agent_id\": \"$AGENT_ID\"}") + +NEW_CLIENT_ID=$(echo $ROTATE_RESPONSE | jq -r '.client_id') +NEW_CLIENT_SECRET=$(echo $ROTATE_RESPONSE | jq -r '.client_secret') +echo "New credential: $NEW_CLIENT_ID" +``` + +Attempt to use the old token (must be rejected): + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents +# Expected: 401 +``` + +Issue a new token with the new credentials: + +```bash +NEW_TOKEN_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/token \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=$NEW_CLIENT_ID&client_secret=$NEW_CLIENT_SECRET&scope=read") + +NEW_ACCESS_TOKEN=$(echo $NEW_TOKEN_RESPONSE | jq -r '.access_token') +echo "New token obtained." +``` + +Verify the new token works: + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents +# Expected: 200 +``` + +**Step B.8 — Check audit log** + +```bash +curl -s -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + "http://localhost:3000/api/v1/audit?limit=10" | jq . +``` + +Expected: JSON array of audit events. Verify these action types are present from Steps B.1–B.7: +`agent.created`, `credential.generated`, `token.issued`, `credential.rotated`, `token.revoked`. + +--- + +## Phase C — Guardrails + +This phase tests security boundaries. Each test case must be run with the exact command shown +and must produce the specified HTTP status code. + +> **Setup:** Ensure `$NEW_ACCESS_TOKEN` is still set from Phase B. Use `export NEW_ACCESS_TOKEN` +> if switching terminals. + +**Test C.1 — No Authorization header → 401** + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + http://localhost:3000/api/v1/agents +``` + +Expected HTTP status: `401` + +**Test C.2 — Malformed JWT → 401** + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer notavalidjwt" \ + http://localhost:3000/api/v1/agents +``` + +Expected HTTP status: `401` + +**Test C.3 — Expired JWT → 401** + +Use a known-expired token. Generate one with a 1-second TTL (requires a test helper or +manually craft an expired JWT). For field trial purposes, use this pre-constructed expired token +(signed with a different key — will fail signature verification and return 401): + +```bash +EXPIRED_TOKEN="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0IiwiZXhwIjoxfQ.invalid" + +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $EXPIRED_TOKEN" \ + http://localhost:3000/api/v1/agents +``` + +Expected HTTP status: `401` + +**Test C.4 — Valid JWT, wrong scope → 403** + +Issue a token with scope `read`, then attempt to access an endpoint requiring scope `write`: + +```bash +# The NEW_ACCESS_TOKEN has scope "read" +# Attempt an action requiring "write" scope (create agent) +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + -H "Content-Type: application/json" \ + -X POST http://localhost:3000/api/v1/agents \ + -d '{"email":"scope-test@example.com","agent_type":"custom","version":"1.0.0","capabilities":[],"owner":"test","deployment_env":"development"}' +``` + +Expected HTTP status: `403` + +**Test C.5 — Rate limit: 101 requests → 429 on the 101st** + +Send 101 requests in rapid succession. The 101st must return 429. + +```bash +for i in $(seq 1 101); do + STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents) + if [ "$STATUS" = "429" ]; then + echo "Request $i returned 429 (PASS)" + break + fi +done +``` + +Expected: Output shows `Request 101 returned 429 (PASS)` (or earlier if previous requests in +the session have already counted toward the window). + +After this test, wait 60 seconds for the rate limit window to reset, or use a fresh +`client_id` for subsequent tests. + +**Test C.6 — Tier limit: exceed free-tier API call limit → 429 with `tier_limit_exceeded`** + +The free tier allows 1,000 API calls per day. For field trial, manually set the counter to the +limit value to trigger the guard without making 1,000 real requests: + +```bash +# Get the org_id from the token +ORG_ID=$(echo $NEW_ACCESS_TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.organization_id') + +# Force the counter to the limit via Redis CLI +docker compose exec redis redis-cli SET "rate:tier:calls:$ORG_ID" 1001 EX 86400 + +# The next API call must be rejected +TIER_RESPONSE=$(curl -s -w "\n%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents) + +echo "$TIER_RESPONSE" +``` + +Expected: HTTP status `429`. Response body must contain `"code":"tier_limit_exceeded"`. + +Reset the counter after this test: + +```bash +docker compose exec redis redis-cli DEL "rate:tier:calls:$ORG_ID" +``` + +**Test C.7 — Tenant isolation: Org A token cannot access Org B agents → 403** + +Create a second organisation and agent: + +```bash +ORG_B_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Content-Type: application/json" \ + -d '{"name":"Org B","slug":"org-b"}') + +ORG_B_ID=$(echo $ORG_B_RESPONSE | jq -r '.org_id') +echo "ORG_B_ID: $ORG_B_ID" + +AGENT_B_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Content-Type: application/json" \ + -d "{ + \"email\": \"org-b-agent@org-b.sentryagent.ai\", + \"agent_type\": \"monitor\", + \"version\": \"1.0.0\", + \"capabilities\": [], + \"owner\": \"org-b\", + \"deployment_env\": \"development\", + \"organization_id\": \"$ORG_B_ID\" + }") + +AGENT_B_ID=$(echo $AGENT_B_RESPONSE | jq -r '.agent_id') +echo "AGENT_B_ID: $AGENT_B_ID" +``` + +Attempt to access Org B's agent using Org A's token: + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents/$AGENT_B_ID +``` + +Expected HTTP status: `403` + +--- + +## Phase D — Portal + +**Step D.1 — Install portal dependencies** + +```bash +cd portal && npm install && cd .. +``` + +**Step D.2 — Start the portal development server** + +```bash +cd portal && npm run dev & +``` + +Wait 5 seconds for Next.js to compile, then verify it is listening: + +```bash +curl -s -o /dev/null -w "%{http_code}" http://localhost:3001 +# Expected: 200 or 307 (redirect to /login) +``` + +**Step D.3 — Verify each portal route loads** + +Open a browser and navigate to each of the following URLs. Each must load without a JavaScript +error in the browser console: + +| URL | Expected | +|-----|---------| +| `http://localhost:3001/login` | Login page renders | +| `http://localhost:3001/agents` | Agent list renders (may be empty or show auth redirect) | +| `http://localhost:3001/credentials` | Credentials page renders | +| `http://localhost:3001/audit` | Audit log page renders | +| `http://localhost:3001/analytics` | Analytics dashboard renders | +| `http://localhost:3001/settings/tier` | Tier status page renders | +| `http://localhost:3001/compliance` | Compliance report page renders | +| `http://localhost:3001/webhooks` | Webhooks page renders | +| `http://localhost:3001/marketplace` | Marketplace page renders | + +All 9 routes must load without a blank page or unhandled error. + +**Step D.4 — Verify analytics charts render** + +Navigate to `http://localhost:3001/analytics`. + +Verify both of the following chart components are present in the page DOM: + +```bash +curl -s http://localhost:3001/analytics | grep -c "recharts" +# Expected: 1 or more (recharts is used for TokenTrendChart and AgentHeatmap) +``` + +**Step D.5 — Verify tier status page** + +Navigate to `http://localhost:3001/settings/tier`. + +The page must display the current tier (expected: `free` for a new organisation). + +**Step D.6 — Stop the portal** + +```bash +kill $(lsof -ti:3001) +``` + +--- + +## Phase E — AGNTCY Conformance + +**Step E.1 — Activate nvm** + +```bash +export NVM_DIR="$HOME/.nvm" && source "$NVM_DIR/nvm.sh" +``` + +**Step E.2 — Run the AGNTCY conformance suite** + +```bash +npm run test:agntcy-conformance +``` + +**Step E.3 — Expected output** + +``` +AGNTCY Conformance Suite + Agent Card Export + ✓ exports valid AGNTCY agent card format + ✓ agent card contains required identity fields + Compliance Report + ✓ generates SOC2-aligned compliance report + ✓ compliance report includes all required control domains + +4 passing (Xs) +``` + +All 4 tests must pass. A failure indicates a regression in AGNTCY conformance. + +**What each test validates:** + +| Test | What it validates | +|------|------------------| +| `exports valid AGNTCY agent card format` | The `/api/v1/compliance/agent-cards` endpoint returns an array where each card has `id`, `name`, `version`, `capabilities`, `did` fields in AGNTCY format | +| `agent card contains required identity fields` | Each agent card's `identity` block includes `agent_id`, `organization_id`, `did`, and `deployment_env` | +| `generates SOC2-aligned compliance report` | The `/api/v1/compliance/report` endpoint returns a report with `generated_at`, `controls`, `summary` top-level keys | +| `compliance report includes all required control domains` | The `controls` array in the report includes entries for `access_control`, `audit_logging`, `credential_management`, and `tenant_isolation` | + +--- + +## Phase F — Performance Baseline + +> **Prerequisite:** Apache Bench (`ab`) must be installed. On Ubuntu: `sudo apt install apache2-utils`. +> Verify: `ab -V` + +**Step F.1 — Create a token payload file** + +```bash +cat > /tmp/token_payload.json << 'EOF' +grant_type=client_credentials&client_id=REPLACE_CLIENT_ID&client_secret=REPLACE_CLIENT_SECRET&scope=read +EOF +``` + +Replace `REPLACE_CLIENT_ID` and `REPLACE_CLIENT_SECRET` with `$NEW_CLIENT_ID` and +`$NEW_CLIENT_SECRET` from Phase B: + +```bash +cat > /tmp/token_payload.txt << EOF +grant_type=client_credentials&client_id=${NEW_CLIENT_ID}&client_secret=${NEW_CLIENT_SECRET}&scope=read +EOF +``` + +**Step F.2 — Benchmark token endpoint** + +```bash +ab -n 100 -c 10 \ + -p /tmp/token_payload.txt \ + -T "application/x-www-form-urlencoded" \ + http://localhost:3000/api/v1/token +``` + +**Pass criteria for token endpoint:** + +- `Requests per second` > 10 +- `Time per request (mean)` < 100 ms +- p95 (95th percentile, shown as `95%` in the `Percentage of requests` table) < 100 ms +- Zero non-2xx responses + +**Step F.3 — Benchmark agent list endpoint** + +Ensure `$NEW_ACCESS_TOKEN` is still set and valid. Issue a fresh token if needed: + +```bash +NEW_ACCESS_TOKEN=$(curl -s -X POST http://localhost:3000/api/v1/token \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=${NEW_CLIENT_ID}&client_secret=${NEW_CLIENT_SECRET}&scope=read" \ + | jq -r '.access_token') +``` + +Run the benchmark: + +```bash +ab -n 100 -c 10 \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents +``` + +**Pass criteria for agent list endpoint:** + +- `Time per request (mean)` < 200 ms +- p95 (`95%` row in the `Percentage of requests` table) < 200 ms +- Zero non-2xx responses + +**Step F.4 — Record results** + +Record the following values from each `ab` output for the field trial report: + +| Endpoint | Metric | Value | +|----------|--------|-------| +| `/api/v1/token` | Requests per second | | +| `/api/v1/token` | Mean time per request (ms) | | +| `/api/v1/token` | p95 (ms) | | +| `/api/v1/agents` | Requests per second | | +| `/api/v1/agents` | Mean time per request (ms) | | +| `/api/v1/agents` | p95 (ms) | | + +A field trial passes Phase F if all p95 values are within the pass criteria above. + +--- + +## Troubleshooting + +Each entry follows the pattern: **Symptom** → **Cause** → **Fix** with exact commands. + +--- + +**Port already in use** + +Symptom: + +``` +Error response from daemon: driver failed programming external connectivity on endpoint +sentryagent-idp-app-1: Bind for 0.0.0.0:3000 failed: port is already allocated +``` + +Fix: Kill the process occupying the port, then restart: + +```bash +lsof -ti:3000 | xargs kill +lsof -ti:5432 | xargs kill +lsof -ti:6379 | xargs kill +docker compose up --build -d +``` + +--- + +**Container shows `unhealthy`** + +Symptom: `docker compose ps` shows `unhealthy` for a service. + +Fix: Check logs for the unhealthy service: + +```bash +docker compose logs postgres +docker compose logs redis +docker compose logs app +``` + +Common causes: + +| Service | Cause | Fix | +|---------|-------|-----| +| `postgres` | Wrong database credentials | Verify `DATABASE_URL` in `.env` matches `docker-compose.yml` credentials | +| `redis` | Port conflict | Check `lsof -ti:6379` and kill occupying process | +| `app` | Missing env var | Check `docker compose logs app` for `Failed to start server` message | + +--- + +**Migration fails — connection refused** + +Symptom: + +``` +Migration failed: Error: connect ECONNREFUSED 127.0.0.1:5432 +``` + +Cause: Running `npm run db:migrate` directly on the host (not inside the container) while +PostgreSQL is running inside Docker. + +Fix: Always run migrations inside the container during a field trial: + +```bash +docker compose exec app npm run db:migrate +``` + +--- + +**Migration fails — relation already exists** + +Symptom: + +``` +Migration failed: Error: relation "agents" already exists +``` + +Cause: A previous partial migration run left the database in an inconsistent state. + +Fix: Check which migrations have been applied: + +```bash +docker compose exec postgres psql -U agentidp -d agentidp \ + -c "SELECT name FROM schema_migrations ORDER BY name;" +``` + +If the database state cannot be repaired, reset it: + +```bash +docker compose down -v +docker compose up --build -d +docker compose exec app npm run db:migrate +``` + +> `docker compose down -v` destroys all data. Use only when a clean slate is acceptable. + +--- + +**JWT error — invalid signature or key format** + +Symptom: + +``` +Failed to start server: Error: JWT_PRIVATE_KEY and JWT_PUBLIC_KEY environment variables are required +``` + +Or: All tokens return `401 Token signature is invalid`. + +Cause: JWT keys in `.env` have incorrect PEM format — literal newlines instead of `\n` +sequences, or trailing whitespace. + +Fix: Regenerate the keys and re-write them using the exact commands from Step 0.2 and 0.3. + +Verify the key format in `.env`: + +```bash +grep "JWT_PRIVATE_KEY" .env | head -c 100 +# Expected: JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\nMII... +# NOT: JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY----- +# MII... +``` + +The entire key must be on a single line with `\n` as literal backslash-n characters, not +actual newlines. + +--- + +**Portal CORS error** + +Symptom: Browser console shows: + +``` +Access to XMLHttpRequest at 'http://localhost:3000/api/v1/...' from origin 'http://localhost:3001' +has been blocked by CORS policy: No 'Access-Control-Allow-Origin' header is present +``` + +Cause: `CORS_ORIGIN` in `.env` does not include `http://localhost:3001`, or is set to a +different value. + +Fix: + +```bash +sed -i "s|CORS_ORIGIN=.*|CORS_ORIGIN=http://localhost:3001|" .env +docker compose up --build -d +``` + +Wait for the `app` container to become healthy before retrying. + +--- + +**Tier counter not resetting** + +Symptom: All API calls return 429 `tier_limit_exceeded` even after waiting. + +Cause: The Redis tier counter was manually set in Test C.6 and not deleted. + +Fix: + +```bash +# Get your org_id from the token +ORG_ID=$(echo $NEW_ACCESS_TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.organization_id') + +docker compose exec redis redis-cli DEL "rate:tier:calls:$ORG_ID" +docker compose exec redis redis-cli DEL "rate:tier:tokens:$ORG_ID" +``` + +--- + +**`ab` not found** + +Symptom: `ab: command not found` + +Fix: + +```bash +sudo apt-get update && sudo apt-get install -y apache2-utils +# or on macOS: +brew install httpd +``` + +--- + +**AGNTCY conformance test fails** + +Symptom: One or more tests in `npm run test:agntcy-conformance` fail. + +Diagnosis steps: + +1. Ensure the backend is running and healthy: `curl -s http://localhost:3000/health` +2. Ensure `COMPLIANCE_ENABLED=true` in `.env` (check with `grep COMPLIANCE_ENABLED .env`) +3. Ensure at least one agent has been registered (Phase B must have been completed) +4. Check the test output for the specific assertion that failed +5. Check `docker compose logs app` for errors around compliance report generation + +If the issue is a Redis cache hit returning stale data: + +```bash +docker compose exec redis redis-cli KEYS "compliance:*" | xargs docker compose exec redis redis-cli DEL +``` + +Then re-run the conformance suite. diff --git a/docs/devops/local-development.md b/docs/devops/local-development.md index e07c791..7d8aaac 100644 --- a/docs/devops/local-development.md +++ b/docs/devops/local-development.md @@ -6,9 +6,12 @@ Complete setup guide for running AgentIdP locally. | Tool | Minimum version | Purpose | |------|----------------|---------| -| Docker + Docker Compose | 24+ | Run PostgreSQL and Redis | -| Node.js | 18.0.0 | Run the application and migrations | +| Docker | 24+ | Container runtime | +| Docker Compose | 2.20+ | Multi-container orchestration | +| Node.js | 18.0.0 | Run the application, portal, and migrations | | npm | 9+ | Package management and scripts | +| nvm | any | Recommended for managing Node.js versions | +| openssl | any | RSA key generation | Verify versions: @@ -19,6 +22,11 @@ node --version npm --version ``` +> **nvm activation:** If using nvm, activate it before running any Node.js commands: +> ```bash +> export NVM_DIR="$HOME/.nvm" && source "$NVM_DIR/nvm.sh" +> ``` + --- ## Step 1 — Clone and install dependencies @@ -27,6 +35,9 @@ npm --version git clone https://git.sentryagent.ai/vijay_admin/sentryagent-idp.git cd sentryagent-idp npm install + +# Install portal dependencies +cd portal && npm install && cd .. ``` --- @@ -127,11 +138,10 @@ Expected output: ``` Running database migrations... ✓ Applied: 001_create_agents.sql - ✓ Applied: 002_create_credentials.sql - ✓ Applied: 003_create_audit_events.sql - ✓ Applied: 004_create_tokens.sql + ... + ✓ Applied: 026_add_tenant_tiers.sql -Migrations complete. 4 migration(s) applied. +Migrations complete. 26 migration(s) applied. ``` See [database.md](database.md) for full migration documentation. @@ -165,9 +175,52 @@ The compiled output is written to `dist/`. `npm start` runs `node dist/server.js --- +## Step 7 — Start the Next.js portal (optional) + +The portal is a Next.js 14 application in the `portal/` directory. It communicates with the +AgentIdP backend at `http://localhost:3000`. + +Start the portal development server: + +```bash +cd portal && npm run dev +``` + +The portal starts on port 3001 by default. Open http://localhost:3001. + +Available routes: + +| Route | Description | +|-------|-------------| +| `/login` | OAuth 2.0 login page | +| `/agents` | Agent registry | +| `/credentials` | Credential management | +| `/audit` | Audit log viewer | +| `/analytics` | Token trend and agent activity charts | +| `/settings/tier` | Tier status and upgrade | +| `/compliance` | AGNTCY compliance report | +| `/webhooks` | Webhook subscription management | +| `/marketplace` | Agent marketplace | + +Build the portal for production: + +```bash +cd portal && npm run build +cd portal && npm start # serves the production build +``` + +Ensure `CORS_ORIGIN` in your `.env` includes `http://localhost:3001`: +``` +CORS_ORIGIN=http://localhost:3001 +``` + +--- + ## Full Docker Compose Stack -> **Note:** The `app` service in `docker-compose.yml` requires a `Dockerfile` which has not been written yet. This is a **Phase 1 P1 pending item**. The commands below will work once the Dockerfile exists. +> The full Docker Compose stack (including the `app` container) is available for field trial +> deployments — see the [field trial guide](field-trial.md). For day-to-day development, start +> only the infrastructure services and run the application directly. When the Dockerfile is available, the entire stack (infrastructure + application) can be started with: diff --git a/docs/devops/operations.md b/docs/devops/operations.md index 7bf414a..5383bd7 100644 --- a/docs/devops/operations.md +++ b/docs/devops/operations.md @@ -18,21 +18,22 @@ Always start services in this order. Starting the application before PostgreSQL ### Startup checklist ```bash -# 1. Start PostgreSQL and Redis -docker-compose up -d postgres redis +# 1. Start the full stack +docker compose up --build -d -# 2. Wait for healthy status -docker-compose ps -# Both postgres and redis must show "healthy" before proceeding +# 2. Verify all three services are healthy +docker compose ps +# app, postgres, and redis must all show "healthy" # 3. Run migrations -npm run db:migrate -# Must complete with 0 errors before starting the app +docker compose exec app npm run db:migrate -# 4. Start the application -npm run dev # development -# or -npm start # production (requires prior npm run build) +# 4. Verify application health +curl http://localhost:3000/health +# Expected: {"status":"ok"} + +# 5. (Optional) Start the portal for local dev +cd portal && npm run dev ``` --- @@ -115,9 +116,12 @@ docker-compose exec redis redis-cli | Key pattern | Example | Purpose | TTL | |------------|---------|---------|-----| -| `revoked:` | `revoked:f1e2d3c4-b5a6-...` | Revoked token JTI | Remaining token lifetime | -| `rate::` | `rate:a1b2c3...:29086156` | Request count per minute window | 60 seconds | -| `monthly:::` | `monthly:a1b2c3...:2026:3` | Token issuance count for free tier | End of month | +| `revoked:` | `revoked:f1e2d3c4-...` | Revoked token JTI | Remaining token lifetime | +| `rate::` | `rate:a1b2c3...:29086156` | Request count per window | `RATE_LIMIT_WINDOW_MS` | +| `monthly:::` | `monthly:a1b2c3...:2026:3` | Monthly token issuance count | End of month | +| `rate:tier:calls:` | `rate:tier:calls:org-uuid` | Daily API call counter for tier enforcement | Until midnight UTC | +| `rate:tier:tokens:` | `rate:tier:tokens:org-uuid` | Daily token issuance counter for tier enforcement | Until midnight UTC | +| `compliance:report:` | `compliance:report:org-uuid` | Cached compliance report JSON | 5 minutes | Inspect keys: @@ -130,6 +134,16 @@ redis-cli GET "rate::" # Check monthly token count for a specific client redis-cli GET "monthly::2026:3" + +# Check tier API call counter for a tenant +redis-cli GET "rate:tier:calls:" + +# Check tier token counter for a tenant +redis-cli GET "rate:tier:tokens:" + +# Check cached compliance report for a tenant +redis-cli GET "compliance:report:" +redis-cli TTL "compliance:report:" ``` Where `` is `floor(unix_ms / 60000)`. For the current window: @@ -258,12 +272,25 @@ AgentIdP exposes a Prometheus metrics endpoint at `GET /metrics` (unauthenticate | Metric | Type | Labels | Description | |--------|------|--------|-------------| -| `agentidp_tokens_issued_total` | Counter | `scope` | OAuth 2.0 tokens issued successfully | -| `agentidp_agents_registered_total` | Counter | `deployment_env` | Agents registered successfully | -| `agentidp_http_requests_total` | Counter | `method`, `route`, `status_code` | HTTP requests received | -| `agentidp_http_request_duration_seconds` | Histogram | `method`, `route`, `status_code` | HTTP request duration | +| `agentidp_tokens_issued_total` | Counter | `scope` | OAuth 2.0 tokens issued | +| `agentidp_agents_registered_total` | Counter | `deployment_env` | Agents registered | +| `agentidp_http_requests_total` | Counter | `method`, `route`, `status_code` | HTTP requests | +| `agentidp_http_request_duration_seconds` | Histogram | `method`, `route`, `status_code` | HTTP latency | | `agentidp_db_query_duration_seconds` | Histogram | `operation` | PostgreSQL query duration | | `agentidp_redis_command_duration_seconds` | Histogram | `command` | Redis command duration | +| `agentidp_webhook_dead_letters_total` | Counter | `event_type` | Webhook deliveries moved to dead-letter queue | +| `agentidp_credentials_expiring_soon_total` | Gauge | — | Credentials expiring within 7 days | +| `agentidp_audit_chain_integrity` | Gauge | — | `1` if audit chain is intact, `0` if broken | +| `agentidp_rate_limit_hits_total` | Counter | `client_id` | Rate limit rejections | +| `agentidp_db_pool_active_connections` | Gauge | — | Active PostgreSQL connections | +| `agentidp_db_pool_waiting_requests` | Gauge | — | Requests waiting for a pool connection | +| `agentidp_tenant_api_calls_total` | Counter | `org_id`, `tier` | API calls per tenant per tier | +| `agentidp_billing_limit_rejections_total` | Counter | `org_id`, `limit_type` | Tier limit enforcement rejections | +| `agentidp_did_documents_generated_total` | Counter | — | DID documents generated | +| `agentidp_oidc_tokens_issued_total` | Counter | — | OIDC ID tokens issued | +| `agentidp_federation_events_total` | Counter | `event_type` | Federation partner events | +| `agentidp_delegation_chains_created_total` | Counter | — | A2A delegation chains created | +| `agentidp_compliance_reports_generated_total` | Counter | — | Compliance reports generated | ### Starting the Monitoring Stack @@ -282,3 +309,50 @@ The Grafana dashboard auto-provisions on first start. Navigate to **Dashboards `GET /metrics` is unauthenticated. In production, ensure this endpoint is: - Only accessible from your internal network (firewall rule or reverse proxy restriction) - Not exposed on a public-facing port + +--- + +### Tier limit rejected — 429 with `tier_limit_exceeded` code + +Symptom: `429 TOO_MANY_REQUESTS` with body `{"code":"tier_limit_exceeded","message":"..."}` + +Check the tenant's current tier counter: +```bash +# Check API call counter +docker compose exec redis redis-cli GET "rate:tier:calls:" + +# Check the tenant's tier +psql "$DATABASE_URL" -c "SELECT org_id, tier FROM tenant_tiers WHERE org_id = '';" +``` + +If the org is on the `free` tier and has hit 1,000 calls/day, upgrade the tier or wait until +midnight UTC for the counter to reset. + +--- + +### Analytics endpoints return 404 + +Cause: `ANALYTICS_ENABLED` is set to `false` in `.env`. + +Fix: Set `ANALYTICS_ENABLED=true` and restart the application. + +--- + +### Compliance report returns 404 + +Cause: `COMPLIANCE_ENABLED` is set to `false` in `.env`. + +Fix: Set `COMPLIANCE_ENABLED=true` and restart the application. + +--- + +### Portal CORS error + +Symptom: Browser console shows `Access-Control-Allow-Origin` error on requests to +`http://localhost:3000`. + +Fix: Ensure `CORS_ORIGIN` in `.env` includes `http://localhost:3001`: +``` +CORS_ORIGIN=http://localhost:3001 +``` +Restart the application after changing this variable. diff --git a/docs/devops/security.md b/docs/devops/security.md index 0fe8c7e..9db8add 100644 --- a/docs/devops/security.md +++ b/docs/devops/security.md @@ -87,6 +87,12 @@ Rotating the JWT keys invalidates all currently active tokens — every authenti **Important:** There is no grace period or dual-key support in Phase 1. All tokens issued with the old private key are immediately rejected after rotation. If zero-downtime key rotation is required, it is a Phase 2 feature. +> **OIDC keys** are separate from the main JWT keys. OIDC signing keys are stored in the +> `oidc_keys` PostgreSQL table (created by migration `014_create_oidc_keys_table.sql`), encrypted +> at rest using pgcrypto (enabled by migration `018_enable_pgcrypto.sql`). The `OIDCKeyService` +> manages rotation. OIDC keys do not need to be set as environment variables — they are +> provisioned automatically on first startup. + --- ## CORS Configuration diff --git a/docs/devops/vault-setup.md b/docs/devops/vault-setup.md index 37956a8..72e4315 100644 --- a/docs/devops/vault-setup.md +++ b/docs/devops/vault-setup.md @@ -47,6 +47,10 @@ VAULT_TOKEN=dev-root-token VAULT_MOUNT=secret ``` +> **Note:** The `.env.example` file uses `VAULT_KV_MOUNT` as the variable name. The application +> reads both `VAULT_KV_MOUNT` and `VAULT_MOUNT` — prefer `VAULT_KV_MOUNT` in new configurations +> for consistency with the current `.env.example`. + The KV v2 secrets engine is automatically enabled at `secret/` in dev mode. No further configuration is needed. > **Warning**: Dev mode stores everything in memory. Data is lost when the container stops. Do not use dev mode in production. diff --git a/docs/engineering/01-overview.md b/docs/engineering/01-overview.md index beebcee..226d08b 100644 --- a/docs/engineering/01-overview.md +++ b/docs/engineering/01-overview.md @@ -71,6 +71,16 @@ all six AGNTCY domains: | Prometheus Metrics | `GET /metrics` | prom-client; all HTTP routes instrumented with request counter and duration histogram | | HashiCorp Vault | (opt-in, via `VAULT_ADDR` + `VAULT_TOKEN`) | KV v2 secret storage; constant-time comparison; bcrypt fallback when Vault is not configured | | Health Check | `GET /health` | Checks PostgreSQL and Redis connectivity; unauthenticated; used by load balancers | +| W3C Decentralised Identifiers | `GET /api/v1/agents/:id/did`, `GET /api/v1/.well-known/did.json` | DID Core 1.0 documents; `did:web` method; EC P-256 keys; AGNTCY extension fields | +| AGNTCY Agent Cards | `GET /api/v1/agents/:id/card` | Machine-readable agent identity summary; AGNTCY schema v1.0 | +| AGNTCY Compliance Reports | `GET /api/v1/compliance/report`, `GET /api/v1/compliance/agent-cards` | Compliance sections: agent-identity + audit-trail; cached 5 min; AGNTCY schema v1.0 | +| Federation (Cross-IdP) | `POST /api/v1/federation/partners`, `GET /api/v1/federation/partners`, `POST /api/v1/federation/verify` | Register partner IdPs; verify cross-IdP JWTs using cached partner JWKS | +| A2A Delegation | `POST /api/v1/oauth2/token/delegate`, `POST /api/v1/oauth2/token/verify-delegation` | Agent-to-agent delegation tokens; OIDC provider (oidc-provider v9) mounted at `/oidc` | +| Webhook Subscriptions | `POST /api/v1/webhooks`, `GET /api/v1/webhooks`, `GET /api/v1/webhooks/:id/deliveries` | Outbound event delivery with HMAC signing; Vault-backed secrets; delivery history | +| Tier Management | `GET /api/v1/tiers/status`, `POST /api/v1/tiers/upgrade` | Free / Pro / Enterprise tiers; daily call and token limits; Stripe Checkout upgrade flow | +| Billing | `POST /api/v1/billing/checkout`, `POST /api/v1/billing/webhook`, `GET /api/v1/billing/status` | Stripe subscription management; webhook event processing | +| Analytics | Internal (via `AnalyticsService`) | Daily aggregated event counts per org; token trend queries (up to 90 days); agent activity heatmap; usage summary | +| Developer Portal | `/portal` (Next.js 14, separate process) | Get-started wizard, SDK explorer, API reference, analytics dashboard, pricing page | --- @@ -80,7 +90,10 @@ all six AGNTCY domains: |-------|--------|-----------------| | Phase 1 — MVP | COMPLETE | Agent registry, OAuth 2.0 Client Credentials (RS256 JWTs), credential management (bcrypt), immutable audit log, Node.js SDK, Dockerfile, Docker Compose, AGNTCY alignment documentation, >80% test coverage | | Phase 2 — Production-Ready | COMPLETE | HashiCorp Vault opt-in integration, Python SDK (sync + async), Go SDK (context-aware), Java SDK (builder + CompletableFuture), OPA policy engine (Rego + Wasm + TypeScript fallback), React 18 + Vite 5 web dashboard, Prometheus metrics + Grafana dashboards, Terraform multi-region deployment (AWS ECS + RDS + ElastiCache; GCP Cloud Run + Cloud SQL + Memorystore) | -| Phase 3 — Enterprise | PLANNED | AGNTCY federation (cross-IdP agent identity), W3C Decentralised Identifiers (DIDs), agent marketplace, advanced compliance reporting, SOC 2 Type II certification, enterprise tier (custom retention, SLAs, advanced RBAC) | +| Phase 3 — Enterprise | COMPLETE | AGNTCY federation (cross-IdP agent identity), W3C Decentralised Identifiers (DIDs), agent marketplace, OIDC provider (A2A delegation), Rust SDK, developer portal (Next.js 14) | +| Phase 4 — Compliance & Security | COMPLETE | AGNTCY compliance reports (agent-identity + audit-trail sections), audit hash chain verification, SOC 2 CC6.1 AES-256-CBC column encryption (`EncryptionService`), DID document caching, federation partner JWKS caching | +| Phase 5 — Scale & Ecosystem | COMPLETE | Multi-tier subscription model (free/pro/enterprise), Stripe billing integration (`BillingService`, `TierService`), tier enforcement middleware (daily call and token limits), webhook subscriptions + delivery history (`WebhookService`), analytics service (daily event aggregation + trend queries) | +| Phase 6 — Market Expansion | COMPLETE | AGNTCY conformance test suite (4 conformance scenarios), API tiers enforced end-to-end, analytics dashboard in developer portal, full Phase 6 engineering documentation update | --- @@ -105,11 +118,15 @@ no implementation begins until an OpenAPI specification is approved by the CTO. ## 6. Free Tier Limits -| Limit | Value | -|-------|-------| -| Max agents | 100 | -| Max credentials per agent | No hard cap enforced in code (5 is the documented recommendation) | -| Max tokens in flight | 10,000 per agent per calendar month | -| Token TTL | 3,600 seconds (1 hour) | -| Audit log retention | 90 days | -| API rate limit | 100 requests per minute per IP address | +| Limit | Free Tier | Pro Tier | Enterprise Tier | +|-------|-----------|----------|-----------------| +| Max agents | 100 | 1,000 | Unlimited | +| Max API calls per day | Configured in `TIER_CONFIG` | Configured in `TIER_CONFIG` | Unlimited | +| Max tokens per day | Configured in `TIER_CONFIG` | Configured in `TIER_CONFIG` | Unlimited | +| Token TTL | 3,600 seconds (1 hour) | 3,600 seconds (1 hour) | 3,600 seconds (1 hour) | +| Audit log retention | 90 days | 1 year | Custom | +| API rate limit (per IP) | 100 req/min | 100 req/min | 100 req/min | +| Webhook subscriptions | 0 | 10 | Unlimited | +| Analytics retention | 90 days | 1 year | Custom | + +Tier limits are configured in `src/config/tiers.ts` (`TIER_CONFIG`). Enforcement is handled by `TierService.enforceAgentLimit()` (agent cap) and `src/middleware/tier.ts` (daily call/token caps). Tier upgrades are initiated via `POST /api/v1/tiers/upgrade` and confirmed via the Stripe webhook. diff --git a/docs/engineering/02-architecture.md b/docs/engineering/02-architecture.md index ed51170..22c08d0 100644 --- a/docs/engineering/02-architecture.md +++ b/docs/engineering/02-architecture.md @@ -13,25 +13,33 @@ graph TD subgraph ExpressApp["Express App — src/app.ts"] Router["Router (src/routes/)"] AuthMW["authMiddleware (src/middleware/auth.ts)"] + TierMW["tierMiddleware (src/middleware/tier.ts)"] OpaMW["opaMiddleware (src/middleware/opa.ts)"] Controller["Controller (src/controllers/)"] Service["Service (src/services/)"] Repository["Repository (src/repositories/)"] - Router --> AuthMW --> OpaMW --> Controller --> Service --> Repository + Router --> AuthMW --> TierMW --> OpaMW --> Controller --> Service --> Repository end - Repository -->|parameterized SQL| PG["PostgreSQL 14\n(agents, credentials, audit_events, token_revocations)"] - Service -->|Redis commands| Redis["Redis 7\n(token revocation list, monthly counts, rate-limit counters)"] - Service -->|KV v2 read/write| Vault["HashiCorp Vault\n(opt-in — when VAULT_ADDR is set)"] + Repository -->|parameterized SQL| PG["PostgreSQL 14\n(agents, credentials, audit_events,\nanalytics_events, organizations,\nfederation_partners, webhook_subscriptions,\nagent_did_keys, delegation_chains)"] + Service -->|Redis commands| Redis["Redis 7\n(token revocation list, daily tier counters,\nJWKS cache, compliance report cache,\nDID document cache)"] + Service -->|KV v2 read/write| Vault["HashiCorp Vault\n(opt-in — credentials, DID private keys,\nwebhook secrets — when VAULT_ADDR is set)"] ExpressApp -->|evaluate input| OPA["OPA Policy Engine\n(policies/authz.rego + data/scopes.json)"] ExpressApp -->|expose| Metrics["/metrics (prom-client)"] + ExpressApp -->|checkout session / webhooks| Stripe["Stripe\n(billing — when STRIPE_SECRET_KEY is set)"] Dashboard["Dashboard SPA (React 18 + Vite 5)\ndashboard/dist/ served from /dashboard"] + Portal["Developer Portal (Next.js 14)\nportal/ — served separately on port 3002"] Client -->|browser| Dashboard + Client -->|browser| Portal Dashboard -->|REST API calls| ExpressApp + Portal -->|REST API calls| ExpressApp Grafana["Grafana (port 3001)"] -->|scrapes| Metrics + + OIDCProvider["OIDC Provider (oidc-provider v9)\nmounted at /oidc — A2A delegation tokens"] + ExpressApp --- OIDCProvider ``` --- @@ -46,12 +54,13 @@ correctly. 2. App-level middleware runs in registration order: `helmet()` sets security headers, `cors()` applies CORS policy from `CORS_ORIGIN`, `morgan('combined')` logs the request line (skipped in `NODE_ENV=test`), `express.json()` and `express.urlencoded()` parse the body, `metricsMiddleware` (`src/middleware/metrics.ts`) starts the request timer and records `agentidp_http_requests_total` and `agentidp_http_request_duration_seconds` on response finish. 3. The Express router matches the path to a route definition in `src/routes/*.ts` and hands off to the appropriate middleware chain. 4. `authMiddleware` (`src/middleware/auth.ts`) validates the Bearer JWT: extracts the token from the `Authorization` header, calls `verifyToken()` for RS256 signature and expiry, then calls `redis.get('revoked:{jti}')` to check the revocation list. On success, attaches the decoded `ITokenPayload` to `req.user`. -5. `opaMiddleware` (`src/middleware/opa.ts`) evaluates the OPA policy: builds an `OpaInput` object from `req.method`, `req.baseUrl + req.path`, and `req.user.scope.split(' ')`, then calls `evaluate(input)`. Uses the Wasm bundle (`policies/authz.wasm`) when present, or the TypeScript fallback reading `policies/data/scopes.json`. Calls `next(new AuthorizationError())` if the policy denies. -6. The controller (`src/controllers/*.ts`) receives the validated request, extracts and validates path params and body using Joi schemas, then delegates to the service layer. -7. The service (`src/services/*.ts`) executes all business logic — enforces free-tier limits, resolves domain rules, and calls repositories. The service has no knowledge of HTTP. -8. The repository (`src/repositories/*.ts`) executes parameterized SQL against PostgreSQL via `node-postgres`, or issues Redis commands via the `redis` client. No business logic lives here. -9. The controller serialises the service result and calls `res.status(xxx).json(payload)`. -10. `AuditService.logEvent()` is called — for high-throughput paths (token issuance, introspection, revocation) this is fire-and-forget (`void` — not awaited); for CRUD operations it is awaited. The audit event is written as an immutable row to the `audit_events` table in PostgreSQL. +5. `tierMiddleware` (`src/middleware/tier.ts`) enforces per-tier daily API call limits. It reads the organisation's current tier from `TierService.fetchTier(orgId)`, checks the daily call counter from Redis key `rate:tier:calls:` against `TIER_CONFIG[tier].maxCallsPerDay`, increments the counter on each passing request (fire-and-forget `INCR` with TTL set to next UTC midnight), and throws `TierLimitError` (429) when the limit is reached. This middleware is applied only to API routes, not to `/health`, `/metrics`, or `/dashboard`. +6. `opaMiddleware` (`src/middleware/opa.ts`) evaluates the OPA policy: builds an `OpaInput` object from `req.method`, `req.baseUrl + req.path`, and `req.user.scope.split(' ')`, then calls `evaluate(input)`. Uses the Wasm bundle (`policies/authz.wasm`) when present, or the TypeScript fallback reading `policies/data/scopes.json`. Calls `next(new AuthorizationError())` if the policy denies. +7. The controller (`src/controllers/*.ts`) receives the validated request, extracts and validates path params and body using Joi schemas, then delegates to the service layer. +8. The service (`src/services/*.ts`) executes all business logic — enforces tier limits, resolves domain rules, and calls repositories. Phase 3–6 introduces specialised services: `AnalyticsService` (fire-and-forget event recording), `TierService` (enforces per-tier agent and call limits), `ComplianceService` (AGNTCY compliance reports, cached 5 min in Redis), `FederationService` (cross-IdP JWT verification with cached JWKS), `DIDService` (W3C DID document generation and caching), `WebhookService` (subscription management with Vault-backed HMAC secrets), and `BillingService` (Stripe Checkout and webhook processing). The service has no knowledge of HTTP. +9. The repository (`src/repositories/*.ts`) executes parameterized SQL against PostgreSQL via `node-postgres`, or issues Redis commands via the `redis` client. No business logic lives here. Phase 3–6 added the following tables: `analytics_events` (daily metric counters), `organizations` (org tier and billing), `federation_partners` (cross-IdP trust registry), `webhook_subscriptions` and `webhook_deliveries` (outbound event delivery), `agent_did_keys` (public EC keys for DID documents), `delegation_chains` (A2A delegation records), `tenant_subscriptions` (Stripe subscription status). +10. The controller serialises the service result and calls `res.status(xxx).json(payload)`. +11. `AuditService.logEvent()` is called — for high-throughput paths (token issuance, introspection, revocation) this is fire-and-forget (`void` — not awaited); for CRUD operations it is awaited. The audit event is written as an immutable row to the `audit_events` table in PostgreSQL. --- @@ -102,6 +111,92 @@ sequenceDiagram --- +## 3b. Analytics Event Capture Flow + +Every successful token issuance writes a fire-and-forget analytics event: + +```mermaid +sequenceDiagram + participant Controller as TokenController + participant OAuth2Svc as OAuth2Service + participant AnalyticsSvc as AnalyticsService + participant PG as PostgreSQL + + Controller->>OAuth2Svc: issueToken(clientId, clientSecret, scope, ...) + OAuth2Svc->>OAuth2Svc: signToken() — RS256 JWT + OAuth2Svc-->>Controller: ITokenResponse + + Note over OAuth2Svc,AnalyticsSvc: fire-and-forget (void) + OAuth2Svc-)AnalyticsSvc: recordEvent(tenantId, 'token_issued') + AnalyticsSvc-)PG: INSERT INTO analytics_events ... ON CONFLICT DO UPDATE count + 1 +``` + +`recordEvent` uses PostgreSQL `UPSERT` — one row per `(organization_id, date, metric_type)`. If the INSERT conflicts (same date, same org, same metric), the `count` column is incremented atomically. This keeps the table compact (one row per day per metric type per org) and fast to query. + +--- + +## 3c. Tier Enforcement Middleware Chain + +```mermaid +sequenceDiagram + actor Agent + participant TierMW as tierMiddleware + participant TierSvc as TierService + participant Redis + participant PG as PostgreSQL + + Agent->>TierMW: API request (with valid Bearer token) + TierMW->>TierSvc: fetchTier(orgId) + TierSvc->>PG: SELECT tier FROM organizations WHERE organization_id = $1 + PG-->>TierSvc: 'pro' + TierSvc-->>TierMW: 'pro' + + TierMW->>Redis: GET rate:tier:calls: + Redis-->>TierMW: "4999" (current daily count) + + Note over TierMW: TIER_CONFIG['pro'].maxCallsPerDay = 50000 — limit not reached + + TierMW-)Redis: INCR rate:tier:calls: (fire-and-forget, TTL = next UTC midnight) + TierMW->>Agent: next() — request proceeds to opaMiddleware +``` + +When the counter equals or exceeds the tier limit, `tierMiddleware` throws `TierLimitError` (429) before `opaMiddleware` runs. The daily counter resets at UTC midnight via Redis TTL. + +--- + +## 3d. A2A Delegation End-to-End Flow + +```mermaid +sequenceDiagram + actor Delegator as Delegator Agent + actor Delegatee as Delegatee Agent + participant AgentIdP + participant DelegationSvc as DelegationService + participant OIDCProvider as OIDC Provider + participant PG as PostgreSQL + + Delegator->>AgentIdP: POST /api/v1/oauth2/token/delegate
{ delegatee_id, scope } + AgentIdP->>DelegationSvc: createDelegation(delegatorId, delegateeId, scope) + DelegationSvc->>PG: INSERT INTO delegation_chains ... + PG-->>DelegationSvc: chain_id + DelegationSvc->>OIDCProvider: issue delegation JWT (delegator claims + delegatee sub) + OIDCProvider-->>DelegationSvc: signed delegation token + DelegationSvc-->>AgentIdP: IDelegationChain (with token) + AgentIdP-->>Delegator: 201 { token, chain_id } + + Note over Delegatee,AgentIdP: Delegatee uses the delegation token + Delegatee->>AgentIdP: POST /api/v1/oauth2/token/verify-delegation
{ token } + AgentIdP->>DelegationSvc: verifyDelegation(token, delegateeId) + DelegationSvc->>PG: SELECT * FROM delegation_chains WHERE chain_id = $1 AND status = 'active' + PG-->>DelegationSvc: chain row (not expired, not revoked) + DelegationSvc->>OIDCProvider: verify token signature + OIDCProvider-->>DelegationSvc: verified claims + DelegationSvc-->>AgentIdP: IDelegationVerifyResult { valid: true, ... } + AgentIdP-->>Delegatee: 200 { valid: true, delegatorId, scope } +``` + +--- + ## 4. Multi-Region Deployment Topology ```mermaid diff --git a/docs/engineering/03-tech-stack.md b/docs/engineering/03-tech-stack.md index a5423da..dd24f8b 100644 --- a/docs/engineering/03-tech-stack.md +++ b/docs/engineering/03-tech-stack.md @@ -253,3 +253,82 @@ diff-based approval workflow. via the AWS console or GCP console are permitted — they will be overwritten on the next `terraform apply`. Terraform state is stored in a remote backend and must not be edited manually. + +--- + +### ADR-11: Stripe + +**Status**: Adopted +**Component**: Billing — subscription management and payment processing + +**Decision**: Use Stripe as the payment processing and subscription management platform. The `stripe` npm package (v21+) handles Checkout Session creation, webhook event verification, and subscription lifecycle events. + +**Rationale**: Stripe's hosted Checkout flow eliminates the need to handle PCI-DSS scope for card data. The `stripe.webhooks.constructEvent()` method uses HMAC-SHA256 to verify incoming webhook payloads, preventing replay attacks. The `checkout.session.completed` event carries `metadata: { orgId, targetTier }`, allowing `BillingService` to delegate tier upgrades to `TierService.applyUpgrade()` without coupling billing logic to tier logic. + +**Alternatives considered**: +- Paddle — rejected because its global merchant-of-record model introduced complexities with the open-source free tier. +- Braintree — rejected because Stripe's webhook reliability and developer experience are superior. + +**Consequences**: Stripe requires `STRIPE_SECRET_KEY` (for API calls) and `STRIPE_WEBHOOK_SECRET` (`whsec_...`, for webhook verification). Per-tier Stripe price IDs are configured via `STRIPE_PRICE_ID_PRO` and `STRIPE_PRICE_ID_ENTERPRISE`. All billing webhook handlers must pass the raw `Buffer` body (not parsed JSON) to `stripe.webhooks.constructEvent()` — use `express.raw()` middleware on the webhook route. + +--- + +### ADR-12: oidc-provider (A2A Delegation) + +**Status**: Adopted +**Component**: A2A delegation — OIDC provider for agent-to-agent trust tokens + +**Decision**: Use the `oidc-provider` npm package (v9.7.x) as the OIDC provider for issuing A2A delegation tokens. The provider is mounted as a sub-application at `/oidc` within the Express app. + +**Rationale**: `oidc-provider` is a certified OpenID Connect implementation that handles the full OIDC protocol, including JWKS serving, token endpoint, and discovery document. Rather than implementing a custom delegation token format, using a standards-compliant OIDC provider means delegation tokens can be verified by any OIDC-aware party using the published JWKS at `/oidc/jwks`. + +**Alternatives considered**: +- Custom JWT signing — rejected because hand-rolled token formats cannot benefit from OIDC tooling and interoperability. + +**Consequences**: `A2A_ENABLED` env var gates the OIDC provider — when set to `'false'`, delegation endpoints return 404. The `OIDC_ISSUER` env var must be set to the full base URL of the OIDC provider (e.g. `https://api.sentryagent.ai`). + +--- + +### ADR-13: Next.js 14 (Developer Portal) + +**Status**: Adopted +**Component**: Developer Portal (`portal/`) — public-facing documentation and onboarding + +**Decision**: Use Next.js 14 (App Router) with Tailwind CSS for the developer portal. The portal is a separate process served on its own port (independent of the Express API server). + +**Rationale**: The developer portal has different performance and SEO requirements than the internal operator dashboard (`dashboard/`). Next.js 14's App Router supports React Server Components, which allows the marketing and documentation pages to be statically generated while the analytics dashboard and API Explorer are client-rendered. Tailwind CSS enables rapid UI development consistent with the design system. + +**Alternatives considered**: +- Extending the Vite dashboard — rejected because the developer portal requires server-side rendering for SEO on marketing pages, which Vite does not provide. +- Docusaurus — rejected because the portal includes interactive components (Swagger Explorer, analytics charts) that are not well-suited to a documentation-only tool. + +**Consequences**: The portal (`portal/`) has its own `package.json`, `tsconfig.json`, `tailwind.config.ts`, and `next.config.js`. It is built and run independently: `cd portal && npm install && npm run dev`. The portal calls the AgentIdP REST API using the same `@sentryagent/idp-sdk` as the dashboard. + +--- + +### ADR-14: bull (Job Queue) + kafkajs (Event Streaming) + +**Status**: Adopted (opt-in) +**Component**: Async job processing and event streaming + +**Decision**: Use `bull` (Redis-backed job queue) for async webhook delivery retries and `kafkajs` for event streaming to external consumers. Both are opt-in — the system operates correctly without Kafka configured. + +**Rationale**: Webhook delivery requires retry logic with exponential backoff and dead-letter handling. `bull` provides this out of the box using the existing Redis dependency. `kafkajs` enables high-throughput event streaming for analytics and audit events to external data pipelines without blocking the primary request path. + +**Alternatives considered**: +- BullMQ — considered as a more modern alternative to `bull` but rejected to avoid adding a new package family during Phase 6. Migration is a future backlog item. + +**Consequences**: Kafka is entirely optional. When `KAFKA_BROKERS` is not set, `kafkajs` is not initialised and no events are published. The `bull` queue for webhook delivery requires only the existing Redis instance. + +--- + +### ADR-15: did-resolver + web-did-resolver (W3C DIDs) + +**Status**: Adopted +**Component**: W3C DID Core 1.0 document resolution + +**Decision**: Use `did-resolver` (v4.1.x) as the DID resolution framework and `web-did-resolver` (v2.0.x) for the `did:web` method implementation. + +**Rationale**: `did-resolver` provides a pluggable resolver interface used by both the server (for internal resolution) and by third parties who want to verify AgentIdP-issued DIDs. The `did:web` method maps DID identifiers to HTTPS URLs hosting the DID document JSON, requiring no blockchain. `DIDService` generates documents that conform to the W3C DID Core 1.0 specification and include AGNTCY-specific extension fields. + +**Consequences**: `DID_WEB_DOMAIN` env var is required for DID generation. DID documents are cached in Redis (`did:doc:`, TTL from `DID_DOCUMENT_CACHE_TTL_SECONDS`, default 300s). Private keys are stored in HashiCorp Vault KV v2 when Vault is configured; in dev mode, a `dev:no-vault` marker is stored and keys are ephemeral. diff --git a/docs/engineering/04-codebase-structure.md b/docs/engineering/04-codebase-structure.md index 7a0ddd4..7a705f9 100644 --- a/docs/engineering/04-codebase-structure.md +++ b/docs/engineering/04-codebase-structure.md @@ -28,9 +28,15 @@ sentryagent-idp/ ├── sdk-python/ # Python SDK (sentryagent-idp) — sync + async clients ├── sdk-go/ # Go SDK (github.com/sentryagent/idp-sdk-go) — context-aware, goroutine-safe ├── sdk-java/ # Java SDK (ai.sentryagent:idp-sdk) — builder pattern, CompletableFuture +├── sdk-rust/ # Rust SDK (sentryagent-idp crate) — async, tokio, reqwest, typed errors ├── policies/ # OPA policy files │ ├── authz.rego # Rego policy — normalise_path + scope-intersection allow rule │ └── data/scopes.json # Endpoint permission map — used by Rego and TypeScript fallback +├── portal/ # Developer Portal — Next.js 14 App Router, Tailwind CSS +│ ├── app/ # Next.js App Router pages (get-started, pricing, sdks, analytics, settings, login) +│ ├── components/ # Shared UI components (Nav.tsx, SwaggerExplorer.tsx, GetStartedWizard.tsx) +│ ├── hooks/ # React hooks (useAuth.ts) +│ └── types/ # TypeScript type definitions for portal-only types ├── terraform/ # Terraform infrastructure as code │ ├── modules/ # Reusable modules: agentidp, lb, rds, redis │ └── environments/ # Environment configs: aws/ (ECS+RDS+ElastiCache), gcp/ (Cloud Run+SQL+Memorystore) @@ -44,6 +50,11 @@ sentryagent-idp/ │ ├── agntcy/ # AGNTCY alignment documentation │ └── openapi/ # OpenAPI 3.0 specification files ├── openspec/ # OpenSpec change management — proposals, designs, specs, tasks, archives +├── tests/ # Jest test suite — mirrors src/ structure +│ ├── unit/ # Unit tests (mocked dependencies) — mirrors src/ +│ ├── integration/ # Integration tests (real DB + Redis) +│ ├── agntcy-conformance/ # AGNTCY conformance test suite (separate Jest config) +│ └── load/ # k6 load test scripts ├── Dockerfile # Multi-stage production build (build + runtime stages) ├── docker-compose.yml # Local development: PostgreSQL 14 (port 5432) + Redis 7 (port 6379) ├── docker-compose.monitoring.yml # Monitoring overlay: Prometheus (port 9090) + Grafana (port 3001) @@ -69,6 +80,8 @@ sentryagent-idp/ | `src/metrics/` | Prometheus metrics registry — all `Counter` and `Histogram` definitions in one place | Only file that calls `new Counter()` or `new Histogram()`; all other files import from here | | `src/db/` | PostgreSQL connection pool factory (`pool.ts`) and numbered SQL migration files in `migrations/` | Pool is a singleton created once in `src/app.ts` and passed to repositories | | `src/cache/` | Redis client factory — creates and caches a single `redis` client instance | Client is a singleton created once in `src/app.ts` and passed to repositories | +| `src/config/` | Configuration constants — `tiers.ts` exports `TIER_CONFIG`, `TIER_RANK`, `TierName`, and `isTierName()` type guard | Imported by `TierService` and `tierMiddleware`; never imports from services | +| `src/middleware/tier.ts` | Tier enforcement middleware — reads org tier from `TierService`, checks daily call counter in Redis, throws `TierLimitError` (429) when limit is exceeded, increments counter on pass | Applied only to API routes; skips `/health`, `/metrics`, and static file routes | --- @@ -84,6 +97,10 @@ sentryagent-idp/ | A new environment variable | `src/utils/config.ts` (if it exists) or the relevant consumer file + `docs/devops/environment-variables.md` | `RATE_LIMIT_MAX` controlling the rate-limit ceiling | | A new Prometheus metric | `src/metrics/registry.ts` | A `Histogram` for Vault lookup duration | | A new TypeScript type used in 2+ files | `src/types/index.ts` | A new `AgentGroupMembership` interface | +| A new tier-gated feature | `src/config/tiers.ts` (add limit field) + `src/middleware/tier.ts` (add check) + service (enforce) | Adding a `maxWebhooksPerOrg` tier limit | +| A webhook event handler | `src/services/WebhookService.ts` (add event type to `WebhookEventType`) + the producer that calls `void webhookService.dispatch(orgId, eventType, payload)` | Emitting `agent.decommissioned` events to subscriber URLs | +| A new analytics metric type | `src/services/AnalyticsService.ts` (call `recordEvent(tenantId, 'new_metric')` in the relevant service using `void`) | Recording `credential_rotated` events for analytics | +| A new DID endpoint | `src/controllers/DIDController.ts` + `src/routes/did.ts` + `src/services/DIDService.ts` (if new method needed) + `policies/data/scopes.json` | Adding `GET /api/v1/agents/:id/did/rotate-key` | --- diff --git a/docs/engineering/05-services.md b/docs/engineering/05-services.md index 104dde7..d6bd30e 100644 --- a/docs/engineering/05-services.md +++ b/docs/engineering/05-services.md @@ -340,3 +340,245 @@ docker compose -f docker-compose.yml -f docker-compose.monitoring.yml up Grafana is pre-provisioned with a Prometheus data source pointing to `http://prometheus:9090` and dashboard JSON files from `monitoring/grafana/dashboards/`. No manual configuration is needed after startup. + +--- + +### AnalyticsService + +**Purpose**: Records daily aggregated analytics events (token issuances, agent activity) and exposes query methods for token trends, agent activity heatmaps, and per-agent usage summaries. All query methods scope results strictly to the supplied `tenantId`. The `recordEvent` method is fire-and-forget — it catches all errors internally and never propagates them to the caller, so analytics writes never block primary request paths. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `recordEvent` | `tenantId: string, metricType: string` | `Promise` | Upserts a daily counter row in `analytics_events` via `INSERT ... ON CONFLICT DO UPDATE SET count = count + 1`. Catches and swallows all errors; safe to call with `void` on hot paths. | +| `getTokenTrend` | `tenantId: string, days: number` | `Promise` | Returns daily token issuance counts for the last N days (clamped to 90). Uses `generate_series` + `LEFT JOIN` so that days with no events appear as `count: 0`. Results sorted ascending by date. | +| `getAgentActivity` | `tenantId: string` | `Promise` | Returns agent activity bucketed by day-of-week (0=Sun…6=Sat) and hour-of-day for the last 30 days. Reads only rows whose `metric_type` matches the pattern `agent::`. | +| `getAgentUsageSummary` | `tenantId: string` | `Promise` | Returns per-agent token issuance totals for the current calendar month, joined with the agent name (`owner` field). Sorted descending by `token_count`. Excludes decommissioned agents. | + +**Dependencies**: PostgreSQL connection pool (`Pool` from `pg`). No Redis usage. + +**Configuration**: None. `MAX_TREND_DAYS = 90` is a module-level constant. + +**DB tables**: +- `analytics_events`: `organization_id` (UUID FK to `organizations`), `date` (DATE), `metric_type` (text — e.g. `'token_issued'`, `'agent::token_issued'`), `count` (integer). Unique constraint on `(organization_id, date, metric_type)`. +- `agents`: read in `getAgentUsageSummary` to join `owner` and filter by `organization_id`. + +--- + +### TierService + +**Purpose**: Single authority for all subscription tier business logic — fetches current tier and live usage, initiates Stripe Checkout sessions for upgrades, applies confirmed upgrades to the `organizations` table, and enforces per-tier agent count limits. Controllers and middleware delegate all tier decisions to this service; no tier logic lives elsewhere. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `getStatus` | `orgId: string` | `Promise` | Returns current `tier`, per-tier `limits` (from `TIER_CONFIG`), live `usage` (Redis counters + DB agent count), and `resetAt` (ISO 8601 next UTC midnight). Falls back to `0` for Redis counters when Redis is unavailable. | +| `initiateUpgrade` | `orgId: string, targetTier: TierName` | `Promise` | Validates that `targetTier` is strictly higher rank than current tier. Creates a Stripe Checkout Session with `mode: 'subscription'`, `metadata: { orgId, targetTier }`, and the price ID from `STRIPE_PRICE_ID_` env var. Returns `{ checkoutUrl }`. | +| `applyUpgrade` | `orgId: string, tier: TierName` | `Promise` | Sets `organizations.tier` and `organizations.tier_updated_at = NOW()`. Called by the Stripe webhook handler after `checkout.session.completed`. | +| `fetchTier` | `orgId: string` | `Promise` | Queries `organizations.tier` for the given org. Returns `'free'` as a safe default when no row is found or the stored value is not a valid `TierName`. | +| `enforceAgentLimit` | `orgId: string, tier: TierName` | `Promise` | Counts non-decommissioned agents for the org and throws `TierLimitError` when count is at or over `TIER_CONFIG[tier].maxAgents`. No-op for Enterprise (infinite limit). Called by `AgentService` before creating a new agent. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`), Stripe client (`Stripe`). Imports `TIER_CONFIG` and `TIER_RANK` from `src/config/tiers.ts`. + +**Configuration**: +- `STRIPE_PRICE_ID_PRO` — Stripe price ID for the Pro tier +- `STRIPE_PRICE_ID_ENTERPRISE` — Stripe price ID for the Enterprise tier +- `STRIPE_PRICE_ID` — Fallback Stripe price ID when tier-specific vars are not set +- `STRIPE_SUCCESS_URL` — Redirect URL on successful checkout (default: `APP_BASE_URL/dashboard?billing=success`) +- `STRIPE_CANCEL_URL` — Redirect URL when checkout is cancelled (default: `APP_BASE_URL/dashboard?billing=cancel`) +- `APP_BASE_URL` — Base URL for redirect URL construction (default: `http://localhost:3000`) + +**Redis keys**: +- `rate:tier:calls:` — integer, daily API call counter; TTL set at next UTC midnight. Read in `getStatus`. +- `rate:tier:tokens:` — integer, daily token issuance counter; same TTL. Read in `getStatus`. + +**DB tables**: +- `organizations`: `organization_id` (UUID PK), `tier` (text — `'free'|'pro'|'enterprise'`), `tier_updated_at` (timestamptz). Read in `fetchTier`; written in `applyUpgrade`. +- `agents`: read in `enforceAgentLimit` and `getStatus` to count non-decommissioned agents per org. + +**Error types**: +- `ValidationError` (400) — target tier is not higher than current tier +- `TierLimitError` (429) — agent count limit reached for the current tier + +--- + +### ComplianceService + +**Purpose**: Generates AGNTCY-standard compliance reports and exports agent cards for a tenant. Reports cover two sections: `agent-identity` (DID presence and credential expiry checks) and `audit-trail` (cryptographic hash chain verification). Reports are cached in Redis for 5 minutes to avoid repeated expensive DB queries. Agent card export returns all active agents in AGNTCY-standard JSON format. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `generateReport` | `tenantId: string` | `Promise` | Attempts to read `compliance:report:` from Redis; if found, returns it with `from_cache: true`. Otherwise builds the report by running `buildAgentIdentitySection` and `buildAuditTrailSection` in parallel, rolls up the overall status (fail > warn > pass), caches the result for 300 seconds, and returns it. | +| `exportAgentCards` | `tenantId: string` | `Promise` | Queries all non-decommissioned agents for the tenant and maps each to an AGNTCY agent card with `id` (DID or agent UUID), `name`, `capabilities`, `endpoint`, `created_at`, and `agntcy_schema_version: '1.0'`. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`). Internally instantiates `AuditVerificationService` for hash chain verification. + +**Configuration**: None. `CACHE_TTL_SECONDS = 300` and `AGNTCY_SCHEMA_VERSION = '1.0'` are module-level constants. + +**Redis keys**: +- `compliance:report:` — JSON-serialised `IComplianceReport`, TTL 300 seconds. Written by `generateReport`; read on every call within the cache window. + +**DB tables**: +- `agents`: queried in both `buildAgentIdentitySection` (checks DID presence) and `exportAgentCards`. +- `credentials`: queried in `buildAgentIdentitySection` to check active credential expiry per agent. +- `audit_events`: read via `AuditVerificationService` in `buildAuditTrailSection` to verify hash chain integrity. + +**Error types**: None thrown directly. Internal errors in section builders produce `status: 'fail'` sections rather than exceptions. + +**Report structure**: +- `agent-identity` section: `fail` when any active agent is missing a DID or has expired credentials; `warn` when any credential expires within 7 days; `pass` otherwise. +- `audit-trail` section: `fail` when `AuditVerificationService.verifyChain()` returns `verified: false`; `pass` otherwise. + +--- + +### FederationService + +**Purpose**: Manages trusted federation partners and cross-IdP JWT token verification. At partner registration, the partner's JWKS endpoint is validated and the keys are cached in Redis. At token verification, the service fetches (or reuses cached) partner JWKS, verifies the JWT signature and standard claims, enforces the partner's `allowed_organizations` filter, and rejects tokens from suspended or expired partners. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `registerPartner` | `req: ICreatePartnerRequest` | `Promise` | Validates the `jwks_uri` is reachable (5-second timeout) and returns valid JWKS. Inserts the partner row into `federation_partners`. Caches the JWKS in Redis under `federation:jwks:`. | +| `listPartners` | _(none)_ | `Promise` | Updates any partners past `expires_at` to `status = 'expired'` before returning all rows ordered by `created_at DESC`. | +| `getPartner` | `id: string` | `Promise` | Applies the same expiry update, then returns the partner row. Throws `FederationPartnerNotFoundError` (404) when not found. | +| `updatePartner` | `id: string, req: IUpdatePartnerRequest` | `Promise` | Applies a partial update. When `jwks_uri` changes, invalidates the old issuer's JWKS cache entry (`DEL federation:jwks:`). | +| `deletePartner` | `id: string` | `Promise` | Deletes the partner row and invalidates the JWKS cache. | +| `verifyFederatedToken` | `req: IFederationVerifyRequest` | `Promise` | Decodes token header/payload without verification, rejects `alg:none`, looks up partner by `iss`, checks partner status and expiry, fetches JWKS (cache-first), finds matching key by `kid`, converts JWK to PEM, verifies signature via `jsonwebtoken.verify` (RS256 or ES256), enforces `allowed_organizations` filter. Returns `{ valid, issuer, subject, organization_id, claims }`. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`). Uses `jsonwebtoken` for JWT decoding/verification and Node.js `crypto.createPublicKey` for JWK-to-PEM conversion. + +**Configuration**: +- `FEDERATION_JWKS_CACHE_TTL_SECONDS` — TTL for cached partner JWKS in Redis (default: `3600`) + +**Redis keys**: +- `federation:jwks:` — JSON-serialised `IJWKSKey[]`, TTL from `FEDERATION_JWKS_CACHE_TTL_SECONDS`. Written on partner registration and on cache miss during token verification; deleted when a partner is updated (JWKS URI changed) or deleted. + +**DB tables**: +- `federation_partners`: `id` (UUID PK), `name` (text), `issuer` (text — the IdP's issuer URL), `jwks_uri` (text), `allowed_organizations` (text[] — empty means all orgs allowed), `status` (`active|suspended|expired`), `created_at`, `updated_at`, `expires_at` (nullable timestamptz). + +**Error types**: +- `FederationPartnerError` (400) — JWKS endpoint unreachable or returns invalid JWKS +- `FederationPartnerNotFoundError` (404) — partner UUID not found +- `FederationVerificationError` (401) — token malformed, alg:none, unknown issuer, partner suspended/expired, signature invalid, org not in allow list + +--- + +### DIDService + +**Purpose**: Manages W3C DID Core 1.0 document generation, EC P-256 key pair creation, and AGNTCY agent card export. Generates per-agent `did:web` identifiers, stores private keys in HashiCorp Vault (or records a `dev:no-vault` marker in dev mode), and caches DID documents in Redis. Builds both an instance-level DID document (for AgentIdP itself) and per-agent DID documents with AGNTCY extension properties. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `generateDIDForAgent` | `agentId: string, organizationId: string` | `Promise<{ did: string; publicKeyJwk: IPublicKeyJwk }>` | Generates an EC P-256 key pair. Stores the private key PEM in Vault KV v2 at `{mount}/data/agentidp/agents/{agentId}/did-key`. Encrypts the vault path via `EncryptionService` (when configured). Inserts a row into `agent_did_keys`. Updates `agents.did` and `agents.did_created_at`. Returns the `did:web` identifier and public key JWK. | +| `buildInstanceDIDDocument` | _(none)_ | `Promise` | Builds the root instance DID document for AgentIdP (format: `did:web:{DID_WEB_DOMAIN}`). Cached in Redis under `did:doc:instance`. | +| `buildAgentDIDDocument` | `agentId: string` | `Promise` | Builds a per-agent DID document (format: `did:web:{DID_WEB_DOMAIN}:agents:{agentId}`). Decommissioned agents get a deactivated document with an `AgentStatus: decommissioned` service entry. Cached in Redis under `did:doc:{agentId}` for active agents only. Throws `AgentNotFoundError` if the agent does not exist. | +| `buildResolutionResult` | `agentId: string` | `Promise` | Wraps `buildAgentDIDDocument` with W3C DID Resolution metadata (`didDocumentMetadata`, `didResolutionMetadata`). | +| `buildAgentCard` | `agentId: string` | `Promise` | Returns an AGNTCY-format agent card with `did`, `name` (agent email), `agentType`, `capabilities`, `owner`, `version`, `deploymentEnv`, `identityProvider`, and `issuedAt`. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`), optional `VaultClient`, optional `EncryptionService`. Uses `node-vault` directly for DID private key storage. + +**Configuration**: +- `DID_WEB_DOMAIN` — required; the domain for `did:web` DID construction (e.g. `idp.sentryagent.ai`) +- `DID_DOCUMENT_CACHE_TTL_SECONDS` — Redis cache TTL for DID documents (default: `300`) +- `VAULT_ADDR`, `VAULT_TOKEN`, `VAULT_MOUNT` — when set, private keys are stored in Vault; otherwise `dev:no-vault` marker is used + +**Redis keys**: +- `did:doc:instance` — JSON-serialised instance `IDIDDocument`, TTL from `DID_DOCUMENT_CACHE_TTL_SECONDS` +- `did:doc:` — JSON-serialised per-agent `IDIDDocument`, same TTL. Not cached for decommissioned agents. + +**DB tables**: +- `agents`: `did` (text — `did:web:...`), `did_created_at` (timestamptz). Written by `generateDIDForAgent`; read in all document-building methods. +- `agent_did_keys`: `key_id` (UUID PK), `agent_id` (UUID FK), `organization_id` (UUID FK), `public_key_jwk` (JSONB), `vault_key_path` (text — Vault KV v2 path or `dev:no-vault`), `key_type` (`'EC'`), `curve` (`'P-256'`), `created_at`. Written by `generateDIDForAgent`. + +**Error types**: +- `AgentNotFoundError` (404) — agent UUID not found in `buildAgentDIDDocument`, `buildResolutionResult`, `buildAgentCard` + +--- + +### WebhookService + +**Purpose**: Manages webhook subscriptions and their delivery history for a tenant organisation. HMAC signing secrets are stored in HashiCorp Vault KV v2 (when configured) or bcrypt-hashed in PostgreSQL in local mode. The raw secret is only returned once at subscription creation time. `vault_secret_path` is encrypted at rest via `EncryptionService` (AES-256-CBC) before being written to PostgreSQL (SOC 2 CC6.1 compliance). + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `createSubscription` | `orgId: string, req: ICreateWebhookRequest` | `Promise` | Generates a 32-byte random hex HMAC secret. Stores in Vault at `secret/data/agentidp/webhooks/{orgId}/{id}/secret` (Vault mode) or bcrypt-hashes and stores in `secret_hash` (local mode). Encrypts `vault_secret_path` via `EncryptionService`. Returns the subscription including the one-time `secret`. Validates URL must use `https://` and events array must be non-empty. | +| `listSubscriptions` | `orgId: string` | `Promise` | Returns all subscriptions for the org, ordered by `created_at DESC`. No secret fields are included. | +| `getSubscription` | `id: string, orgId: string` | `Promise` | Returns a single subscription. Verifies org ownership. | +| `updateSubscription` | `id: string, orgId: string, req: IUpdateWebhookRequest` | `Promise` | Partially updates `name`, `url`, `events`, or `active` fields. Validates `https://` if URL is changing. | +| `deleteSubscription` | `id: string, orgId: string` | `Promise` | Permanently deletes the subscription and all deliveries (via PostgreSQL CASCADE). | +| `getSubscriptionSecret` | `subscriptionId: string, orgId: string` | `Promise` | Retrieves the raw HMAC secret from Vault (Vault mode only). Throws `WebhookValidationError` in local mode since the secret cannot be recovered after creation. | +| `listDeliveries` | `subscriptionId: string, orgId: string, limit: number, offset: number` | `Promise` | Returns paginated delivery records for a subscription. Verifies org ownership before querying. | + +**Dependencies**: PostgreSQL (`Pool`), optional `VaultClient`, Redis (`RedisClientType` — reserved for future caching), optional `EncryptionService`. + +**Configuration**: Inherits Vault configuration from `VaultClient` (`VAULT_ADDR`, `VAULT_TOKEN`, `VAULT_MOUNT`). `EncryptionService` requires `ENCRYPTION_KEY` env var (see `EncryptionService` docs). + +**DB tables**: +- `webhook_subscriptions`: `id` (UUID PK), `organization_id` (UUID FK), `name` (text), `url` (text — https only), `events` (JSONB — `WebhookEventType[]`), `secret_hash` (text — bcrypt hash in local mode, `'vault'` in Vault mode), `vault_secret_path` (text — encrypted Vault path or `'local'`), `active` (boolean), `failure_count` (integer), `created_at`, `updated_at`. +- `webhook_deliveries`: `id` (UUID PK), `subscription_id` (UUID FK), `event_type` (text), `payload` (JSONB), `status` (`pending|delivered|failed|dead_letter`), `http_status_code` (integer nullable), `attempt_count` (integer), `next_retry_at` (timestamptz nullable), `delivered_at` (timestamptz nullable), `created_at`, `updated_at`. Cascades on subscription delete. + +**Error types**: +- `WebhookNotFoundError` (404) — subscription not found or belongs to another org +- `WebhookValidationError` (400) — invalid URL scheme, empty events array, or secret not recoverable in local mode + +--- + +### BillingService + +**Purpose**: Manages Stripe billing integration — creates Checkout Sessions for tenant subscriptions, processes incoming Stripe webhook events (subscription lifecycle and checkout completion), and retrieves current subscription status. When a `checkout.session.completed` event carries `{ orgId, targetTier }` in its metadata, delegates to `TierService.applyUpgrade` to update the organisation's tier. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `createCheckoutSession` | `tenantId: string, successUrl: string, cancelUrl: string` | `Promise` | Creates a Stripe Checkout Session with `mode: 'subscription'`, `client_reference_id: tenantId`, and the price from `STRIPE_PRICE_ID`. Returns the checkout URL. Throws if Stripe does not return a URL. | +| `handleWebhookEvent` | `rawBody: Buffer, sig: string, webhookSecret: string` | `Promise` | Verifies the Stripe webhook signature via `stripe.webhooks.constructEvent`. Handles `customer.subscription.created/updated/deleted` (upserts `tenant_subscriptions`) and `checkout.session.completed` (applies tier upgrade via `TierService` when metadata contains `orgId` and `targetTier`). | +| `getSubscriptionStatus` | `tenantId: string` | `Promise` | Queries `tenant_subscriptions` for the given tenant. Returns `{ tenantId, status: 'free', currentPeriodEnd: null, stripeSubscriptionId: null }` when no row exists. | + +**Dependencies**: PostgreSQL (`Pool`), Stripe client (`Stripe`), optional `TierService`. + +**Configuration**: +- `STRIPE_PRICE_ID` — Stripe price ID for subscription checkout sessions +- `STRIPE_WEBHOOK_SECRET` — Stripe webhook endpoint secret (`whsec_...`); passed by the webhook controller, not read directly by the service + +**DB tables**: +- `tenant_subscriptions`: `tenant_id` (UUID PK or unique), `status` (text — `'free'|'active'|'past_due'|'canceled'`), `stripe_customer_id` (text), `stripe_subscription_id` (text), `current_period_end` (timestamptz nullable), `updated_at`. Upserted on subscription lifecycle events. + +**Error types**: None defined in the service. Stripe signature failures raise `Error` from `stripe.webhooks.constructEvent`; these propagate to the error handler as 400 responses. + +--- + +### OIDCService (A2A / OIDC Provider) + +**Note**: `src/services/OIDCService.ts` does not exist as a standalone file — OIDC provider functionality is handled by the `oidc-provider` npm package, configured in `src/app.ts` and related route files. The service boundary for OIDC-related business logic is the `DelegationService`. Document the OIDC integration as follows. + +**Purpose**: The OIDC/A2A subsystem provides agent-to-agent (A2A) delegation using the `oidc-provider` library (v9.7.x). The provider is mounted as a sub-application at `/oidc` and issues short-lived delegation tokens scoped to a specific `delegatee_id`. The `DelegationService` (`src/services/DelegationService.ts`) manages the `delegation_chains` table for auditing. + +**Key endpoints exposed by the OIDC provider**: +- `POST /oidc/token` — issues delegation tokens via `client_credentials` or custom grant +- `GET /oidc/.well-known/openid-configuration` — OIDC discovery document +- `GET /oidc/jwks` — public JWK Set for verifying delegation tokens + +**DelegationService public methods** (from `src/services/DelegationService.ts`): + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `createDelegation` | `delegatorId: string, delegateeId: string, scope: string, expiresAt?: Date` | `Promise` | Inserts a delegation chain record into `delegation_chains`. Validates both agents exist and are active. | +| `verifyDelegation` | `token: string, delegateeId: string` | `Promise` | Verifies the delegation token signature and checks the chain record is active and not expired. | +| `revokeDelegation` | `chainId: string, delegatorId: string` | `Promise` | Sets `delegation_chains.status = 'revoked'` and `revoked_at = NOW()`. Validates the delegator owns the chain. | + +**DB tables**: +- `delegation_chains`: `chain_id` (UUID PK), `delegator_id` (UUID), `delegatee_id` (UUID), `scope` (text), `status` (`active|revoked|expired`), `created_at`, `expires_at` (nullable), `revoked_at` (nullable), `token` (text — the delegation JWT). + +**Configuration**: +- `A2A_ENABLED` — when set to `'false'`, A2A/delegation endpoints return 404 +- `OIDC_ISSUER` — issuer URL for the OIDC provider diff --git a/docs/engineering/06-walkthroughs.md b/docs/engineering/06-walkthroughs.md index b4ddfc9..fe044fb 100644 --- a/docs/engineering/06-walkthroughs.md +++ b/docs/engineering/06-walkthroughs.md @@ -715,3 +715,260 @@ must store it securely. "revokedAt": null } ``` + +--- + +## Walkthrough 4 — A2A Delegation End-to-End + +**Request:** `POST /api/v1/oauth2/token/delegate` — one AI agent delegating a scoped capability to another + +This walkthrough traces how agent A (an orchestrator) issues a delegation token that grants agent B (a sub-agent) the right to act on its behalf with a restricted scope. + +--- + +### Step 1 — Route dispatch + +**File:** `src/routes/delegation.ts` + +```typescript +router.post( + '/token/delegate', + asyncHandler(authMiddleware), + opaMiddleware, + asyncHandler(delegationController.createDelegation.bind(delegationController)) +); +``` + +Both `authMiddleware` and `opaMiddleware` run. The OPA policy requires scope `agents:write` for delegation creation. + +--- + +### Step 2 — Controller: extract delegator and validate + +**File:** `src/controllers/DelegationController.ts` + +```typescript +const delegatorId = req.user.sub; // From the Bearer token's sub claim +const { delegatee_id, scope, expires_at } = req.body; +``` + +The controller validates that `delegatee_id` is a non-empty UUID, `scope` is a non-empty string, and `expires_at` (if provided) is a valid ISO 8601 datetime in the future. It passes these to `DelegationService.createDelegation()`. + +--- + +### Step 3 — Service: verify both agents exist + +**File:** `src/services/DelegationService.ts` + +```typescript +const delegator = await this.agentRepository.findById(delegatorId); +if (!delegator || delegator.status !== 'active') { throw new AgentNotFoundError(delegatorId) } + +const delegatee = await this.agentRepository.findById(delegateeId); +if (!delegatee || delegatee.status !== 'active') { throw new AgentNotFoundError(delegateeId) } +``` + +Both agents must exist and be in `active` status. A suspended or decommissioned agent cannot participate in delegation. + +--- + +### Step 4 — Service: insert delegation chain record + +**File:** `src/services/DelegationService.ts` + +```typescript +await this.pool.query( + `INSERT INTO delegation_chains (chain_id, delegator_id, delegatee_id, scope, status, expires_at) + VALUES ($1, $2, $3, $4, 'active', $5)`, + [chainId, delegatorId, delegateeId, scope, expiresAt] +); +``` + +The `chain_id` is a UUID generated by the service. The `delegation_chains` table provides the authoritative source of truth for which delegations are active, independent of any token. + +--- + +### Step 5 — Response + +```json +{ + "chain_id": "f1e2d3c4-...", + "token": "eyJhbGciOiJSUzI1NiJ9...", + "delegator_id": "a1b2c3d4-...", + "delegatee_id": "b2c3d4e5-...", + "scope": "agents:read", + "status": "active", + "expires_at": "2026-04-05T00:00:00Z" +} +``` + +The `token` field is the signed delegation JWT. The delegatee presents this token to `POST /api/v1/oauth2/token/verify-delegation` to prove it has authority to act on the delegator's behalf. + +**Why store both the DB record and the JWT?** The DB record allows revocation — when the delegator calls `DELETE /api/v1/delegation-chains/:chainId`, the record is soft-deleted and all subsequent `verify-delegation` calls will fail even if the JWT itself has not yet expired. + +--- + +## Walkthrough 5 — Tier Enforcement Request Lifecycle + +**Request:** Any authenticated API request when the organisation's daily call limit is reached + +This walkthrough traces how `tierMiddleware` intercepts a request before it reaches the OPA middleware, preventing quota-exceeded traffic from consuming service resources. + +--- + +### Step 1 — Auth middleware passes + +Same as Walkthrough 2, Step 3. The Bearer JWT is verified and `req.user` is populated with `sub` (agentId) and `organization_id`. + +--- + +### Step 2 — Tier middleware: fetch org tier + +**File:** `src/middleware/tier.ts` + +```typescript +const orgId = req.user.organization_id; +const tier = await tierService.fetchTier(orgId); +const config = TIER_CONFIG[tier]; +``` + +`fetchTier()` issues `SELECT tier FROM organizations WHERE organization_id = $1`. Returns `'free'` if no row is found (safe default). + +--- + +### Step 3 — Tier middleware: read daily counter + +**File:** `src/middleware/tier.ts` + +```typescript +const callsKey = `rate:tier:calls:${orgId}`; +const callsToday = await redis.get(callsKey); +const count = callsToday !== null ? parseInt(callsToday, 10) : 0; + +if (count >= config.maxCallsPerDay) { + throw new TierLimitError('calls', config.maxCallsPerDay, { orgId, tier, current: count }); +} +``` + +The Redis key `rate:tier:calls:` is read. If null (first call of the day), count is 0. When count equals or exceeds the tier limit, `TierLimitError` (HTTP 429) is thrown immediately — no further middleware runs. + +--- + +### Step 4 — Tier middleware: increment counter (fire-and-forget) + +**File:** `src/middleware/tier.ts` + +```typescript +// Set TTL to next UTC midnight if key is new +void redis.multi() + .incr(callsKey) + .expireAt(callsKey, nextUtcMidnightUnix()) + .exec(); +next(); +``` + +The counter is incremented atomically using a Redis MULTI block. The `EXPIREAT` command sets the key to auto-delete at the next UTC midnight, resetting the daily counter without any scheduled job. The increment is fire-and-forget — the request proceeds immediately to `opaMiddleware`. + +**Why expire at UTC midnight rather than a rolling 24-hour window?** Tier limits are documented as "per day", which users interpret as resetting at midnight. A rolling window would allow a user to consume their full daily quota twice within a 48-hour period straddling midnight, which is counterintuitive. UTC midnight is predictable and easy to reason about. + +--- + +### Step 5 — Error handler serialises TierLimitError + +**File:** `src/middleware/errorHandler.ts` + +```json +HTTP 429 +{ + "code": "TIER_LIMIT_EXCEEDED", + "message": "Daily API call limit reached for your tier.", + "details": { + "tier": "free", + "limit": 1000, + "current": 1000 + } +} +``` + +The `Retry-After` header is set to the number of seconds until next UTC midnight so clients can implement automatic backoff. + +--- + +## Walkthrough 6 — Analytics Event Capture Flow + +**Trigger:** Any successful token issuance (`POST /api/v1/token`) + +This walkthrough traces how an analytics event is captured without affecting the latency of the primary token issuance response. + +--- + +### Step 1 — Token issuance completes + +**File:** `src/services/OAuth2Service.ts` + +```typescript +const accessToken = signToken(payload, this.privateKey); +// Primary response is ready — analytics is now fire-and-forget +void this.analyticsService.recordEvent(tenantId, 'token_issued'); +tokensIssuedTotal.inc({ scope }); +``` + +The `signToken()` call completes synchronously (RSA signing is CPU-bound, not I/O). The controller can now send the response. `analyticsService.recordEvent()` is called with `void` — the `await` is deliberately omitted. + +**Why `void` instead of `await`?** Token issuance latency must remain below 100ms (per the QA performance gate). A PostgreSQL write adds 5–15ms. Since analytics data is aggregated (not transactional), losing an occasional event due to an error is acceptable. The response is never delayed for analytics. + +--- + +### Step 2 — AnalyticsService: UPSERT daily counter + +**File:** `src/services/AnalyticsService.ts` + +```typescript +async recordEvent(tenantId: string, metricType: string): Promise { + try { + await this.pool.query( + `INSERT INTO analytics_events (organization_id, date, metric_type, count) + VALUES ($1, CURRENT_DATE, $2, 1) + ON CONFLICT (organization_id, date, metric_type) + DO UPDATE SET count = analytics_events.count + 1`, + [tenantId, metricType], + ); + } catch (err) { + console.error('[AnalyticsService] recordEvent failed — primary path unaffected', err); + } +} +``` + +The `ON CONFLICT DO UPDATE` upsert is atomic. Whether this is the first or the ten-thousandth `token_issued` event for this tenant today, the row is updated correctly. All errors are caught and swallowed — the token has already been returned to the caller. + +**Why one row per day per metric, not one row per event?** Storing a row per event would create millions of rows. The daily aggregate model keeps the table compact while still providing daily trend data (the granularity that analytics dashboards need). Sub-day granularity is available from the Prometheus `agentidp_tokens_issued_total` counter if needed. + +--- + +### Step 3 — Dashboard query (deferred) + +When a developer visits the analytics page in the developer portal, the portal calls: + +``` +GET /api/v1/analytics/token-trend?days=30 +``` + +**File:** `src/services/AnalyticsService.ts` — `getTokenTrend(tenantId, 30)` + +```sql +SELECT + gs.date::DATE::TEXT AS date, + COALESCE(ae.count, 0)::INTEGER AS count +FROM generate_series( + CURRENT_DATE - 29 * INTERVAL '1 day', + CURRENT_DATE, + INTERVAL '1 day' +) AS gs(date) +LEFT JOIN analytics_events ae + ON ae.date = gs.date::DATE + AND ae.organization_id = $2 + AND ae.metric_type = 'token_issued' +ORDER BY gs.date ASC +``` + +The `generate_series` + `LEFT JOIN` pattern ensures all 30 days appear in the result, with `count: 0` for days with no events. This avoids the need for the client to fill in gaps. diff --git a/docs/engineering/09-testing.md b/docs/engineering/09-testing.md index 7a941d3..bcbf46a 100644 --- a/docs/engineering/09-testing.md +++ b/docs/engineering/09-testing.md @@ -422,3 +422,165 @@ here is what AgentIdP does to mitigate the risk and how to test it. Test that the server rejects tokens with `alg: none` or `alg: HS256`. The `verifyToken()` function specifies `algorithms: ['RS256']`, which causes jsonwebtoken to reject any token with a different algorithm header. + +--- + +## 10.8 AGNTCY Conformance Test Suite + +**Location:** `tests/agntcy-conformance/conformance.test.ts` + +**Purpose:** Verifies that the AgentIdP platform conforms to the AGNTCY agent identity specification. These tests exercise live HTTP requests through the Express application against real PostgreSQL and Redis instances, exactly like integration tests — but they validate AGNTCY-specific protocol guarantees rather than individual endpoint correctness. + +**How to run:** + +```bash +# Run the conformance suite (separate Jest config) +npm run test:agntcy-conformance + +# Equivalent long form +npx jest --config tests/agntcy-conformance/jest.config.cjs + +# Run with TEST_DATABASE_URL and TEST_REDIS_URL overrides +TEST_DATABASE_URL=postgresql://sentryagent:sentryagent@localhost:5432/sentryagent_idp_test \ +TEST_REDIS_URL=redis://localhost:6379/1 \ +npm run test:agntcy-conformance + +# Enable A2A delegation conformance tests (gated by env var) +A2A_ENABLED=true npm run test:agntcy-conformance +``` + +The conformance suite uses its own `jest.config.cjs` (located in `tests/agntcy-conformance/`) so it does not run with `npm test` by default. This is intentional — the suite requires `COMPLIANCE_ENABLED=true` and optionally `A2A_ENABLED=true`, which should not be required for the standard unit/integration test run. + +**What each test validates:** + +| Conformance Test | What it validates | AGNTCY Domain | +|-----------------|-------------------|---------------| +| **Conformance 1 — Agent registration creates DID:WEB identifier** | `POST /api/v1/agents` returns a `did` field matching `did:web:*` pattern when `DID_WEB_DOMAIN` is set. The `did` field is optional in the response (test is conditional on presence) — but when present, it must conform to the `did:web:` scheme. | Non-Human Identity | +| **Conformance 2 — Token issuance via `client_credentials` grant** | Registers an agent, generates credentials via API, then exercises the full OAuth 2.0 Client Credentials flow. Validates that `POST /api/v1/token` returns a 200 response with `access_token` (string), `token_type: 'Bearer'`, and a JWT with 3 dot-separated parts. | Authentication | +| **Conformance 3 — A2A delegation chain create + verify** | _(Gated by `A2A_ENABLED=true`.)_ Creates a delegation chain between two agents via `POST /api/v1/oauth2/token/delegate`. If a token is returned, verifies it via `POST /api/v1/oauth2/token/verify-delegation`. Accepts 200 or 201 on creation and 200 or 204 on verification. | Agent-to-Agent Trust | +| **Conformance 4 — Compliance report returns valid AGNTCY structure** | Calls `GET /api/v1/compliance/report` and validates all required AGNTCY fields: `generated_at` (valid ISO 8601), `tenant_id` (string), `agntcy_schema_version: '1.0'`, `sections` (array with `name`, `status`, `details` per entry), `overall_status` (one of `pass/fail/warn`). Also verifies the `agent-identity` and `audit-trail` section names are present. A second request verifies the Redis cache (`X-Cache: HIT` header and `from_cache: true` body field). | Audit, Compliance | + +**Schema tables created by conformance suite:** The suite creates its own tables using `CREATE TABLE IF NOT EXISTS` before tests run. The tables match the production schema and include: `organizations`, `agents`, `credentials`, `audit_events`, `token_revocations`, `agent_did_keys`, `delegation_chains`. These are cleaned up via `DELETE` in `afterEach` (child-to-parent order respecting FK constraints) and dropped implicitly when the test database is reset. + +**Environment variables used:** + +| Variable | Required | Purpose | +|---|---|---| +| `TEST_DATABASE_URL` | Yes (or default) | PostgreSQL connection string for the test database | +| `TEST_REDIS_URL` | Yes (or default) | Redis connection string (index 1 recommended) | +| `COMPLIANCE_ENABLED` | Yes (`'true'`) | Enables the compliance report endpoint | +| `A2A_ENABLED` | No (default `'true'`) | Set to `'false'` to skip Conformance 3 (A2A delegation) | +| `DID_WEB_DOMAIN` | No | When set, Conformance 1 validates the `did:web:` format | + +--- + +## 10.9 Tier Enforcement Tests + +**Location:** `tests/unit/services/TierService.test.ts` and `tests/integration/` + +**The TierService has the following test cases that must all pass:** + +### Unit tests (`tests/unit/services/TierService.test.ts`) + +The unit tests mock PostgreSQL (`Pool`) and Redis (`RedisClientType`) and Stripe. Key scenarios: + +| Test | Description | +|------|-------------| +| `getStatus() — returns correct tier and limits` | Mocks `SELECT tier FROM organizations` returning `'pro'`; mocks Redis GET calls for `rate:tier:calls` and `rate:tier:tokens`; verifies `ITierStatus.limits` matches `TIER_CONFIG['pro']`. | +| `getStatus() — falls back to 0 when Redis unavailable` | Redis GET throws; verifies `usage.callsToday = 0` and `usage.tokensToday = 0` with no error thrown. | +| `getStatus() — returns 'free' when org not found` | `SELECT` returns 0 rows; verifies `tier === 'free'`. | +| `initiateUpgrade() — throws ValidationError on downgrade attempt` | `targetTier = 'free'` when current is `'pro'`; verifies `ValidationError` is thrown with `TIER_RANK` comparison failure message. | +| `initiateUpgrade() — calls Stripe with correct metadata` | Verifies `stripe.checkout.sessions.create` is called with `metadata: { orgId, targetTier }` and `mode: 'subscription'`. | +| `applyUpgrade() — executes UPDATE organizations SET tier` | Verifies parameterized SQL is called with `[targetTier, orgId]`. | +| `enforceAgentLimit() — throws TierLimitError when limit reached` | Mock agent count equals `TIER_CONFIG[tier].maxAgents`; verifies `TierLimitError` with `limit` and `current` details. | +| `enforceAgentLimit() — no-op for Enterprise tier` | `TIER_CONFIG['enterprise'].maxAgents = Infinity`; verifies no SQL query for agent count and no error. | +| `fetchTier() — returns 'free' for unknown tier string in DB` | DB returns unrecognised string; verifies `isTierName` guard returns `'free'`. | + +### Integration (middleware) tests + +When writing integration tests for the tier enforcement middleware (`src/middleware/tier.ts`), the following scenarios must be covered: + +| Scenario | Expected behaviour | +|----------|-------------------| +| Request with org on `free` tier, under daily call limit | Request proceeds normally (2xx from downstream handler) | +| Request that would exceed `maxCallsPerDay` for the org's tier | `429 TierLimitError` — body contains `code: 'TIER_LIMIT_EXCEEDED'` | +| Request to `/health` or `/metrics` (unprotected routes) | Tier middleware not applied — always 200 | +| Org not found in `organizations` table | Defaults to `free` tier limits | + +--- + +## 10.10 Analytics Service Tests + +**Location:** `tests/unit/services/AnalyticsService.test.ts` + +The AnalyticsService unit tests mock the PostgreSQL `Pool`. Key scenarios that must be covered: + +| Test | Description | +|------|-------------| +| `recordEvent() — executes UPSERT without throwing` | Verifies `pool.query` is called with the `INSERT ... ON CONFLICT DO UPDATE` SQL pattern and the correct `[tenantId, metricType]` parameters. | +| `recordEvent() — catches and swallows pool errors` | Pool `query` throws; verifies `recordEvent` resolves (not rejects) and the error does not propagate. This is the fire-and-forget contract. | +| `getTokenTrend() — clamps days to 90` | Calls with `days = 200`; verifies `pool.query` receives `clampedDays = 90` as the first parameter. | +| `getTokenTrend() — maps rows to ITokenTrendEntry[]` | Mock returns rows with `date: '2026-03-01', count: '42'`; verifies the result is `[{ date: '2026-03-01', count: 42 }]` (count coerced to number). | +| `getAgentActivity() — maps rows to IAgentActivityEntry[]` | Mock returns rows with string-typed `dow`, `hour`, `count`; verifies all are coerced to numbers in the result. | +| `getAgentUsageSummary() — maps rows to IAgentUsageSummaryEntry[]` | Mock returns rows with `token_count: '150'`; verifies `token_count: 150` (number) in the result. | +| `getAgentUsageSummary() — joins with agents table on organization_id` | Verifies the SQL query joins `agents` with `LEFT JOIN analytics_events` and filters `a.organization_id = $1`. | + +**Coverage gate:** `AnalyticsService` must maintain >80% statement, branch, function, and line coverage. Run: + +```bash +npm run test:unit -- --coverage --testPathPattern=AnalyticsService +``` + +--- + +## 10.11 Running the Complete Phase 6 Test Matrix + +All of the following must pass before any Phase 6 feature is considered complete: + +```bash +# 1. Unit tests (all services including Phase 3–6) +npm run test:unit -- --coverage +# Must exit 0 with all 4 coverage metrics ≥ 80% + +# 2. Integration tests (requires PostgreSQL + Redis running) +npm run test:integration + +# 3. AGNTCY conformance suite +COMPLIANCE_ENABLED=true \ +A2A_ENABLED=true \ +npm run test:agntcy-conformance + +# 4. Dependency security audit +npm audit --audit-level=high +# Must exit 0 — no high or critical vulnerabilities + +# 5. TypeScript compilation +npx tsc --noEmit +# Must exit 0 — zero type errors +``` + +**Current test file inventory** (as of Phase 6 completion): + +Unit test files in `tests/unit/services/`: + +| File | Service tested | +|------|---------------| +| `AgentService.test.ts` | `AgentService` | +| `AnalyticsService.test.ts` | `AnalyticsService` | +| `AuditService.test.ts` | `AuditService` | +| `AuditVerificationService.test.ts` | `AuditVerificationService` | +| `BillingService.test.ts` | `BillingService` | +| `ComplianceService.test.ts` | `ComplianceService` | +| `CredentialService.test.ts` | `CredentialService` | +| `DIDService.test.ts` | `DIDService` | +| `DelegationService.test.ts` | `DelegationService` | +| `EncryptionService.test.ts` | `EncryptionService` | +| `FederationService.test.ts` | `FederationService` | +| `IDTokenService.test.ts` | `IDTokenService` | +| `OAuth2Service.test.ts` | `OAuth2Service` | +| `OIDCKeyService.test.ts` | `OIDCKeyService` | +| `OrgService.test.ts` | `OrgService` | +| `ScaffoldService.test.ts` | `ScaffoldService` | +| `ScaffoldService.errors.test.ts` | `ScaffoldService` error cases | +| `TierService.test.ts` | `TierService` | +| `WebhookService.test.ts` | `WebhookService` | diff --git a/docs/engineering/11-sdk-guide.md b/docs/engineering/11-sdk-guide.md index d08e30d..5b3c65e 100644 --- a/docs/engineering/11-sdk-guide.md +++ b/docs/engineering/11-sdk-guide.md @@ -360,7 +360,286 @@ The `TokenManager` is thread-safe. `AgentIdPClient` is safe for concurrent use f --- -## 6. SDK Contribution Guide — Adding a New Endpoint +## 6. Rust SDK + +The Rust SDK (`sdk-rust/`) is a production-grade, async-first client for the SentryAgent.ai AgentIdP API. It provides full coverage of the 14 API endpoints across agent identity, OAuth 2.0 token management, credential rotation, audit logs, the public marketplace, and agent-to-agent (A2A) delegation. + +**Requirements:** Rust 1.75+ (stable), `tokio` runtime. + +--- + +### Installation + +Add the crate to your `Cargo.toml`: + +```toml +[dependencies] +sentryagent-idp = "1.0" +tokio = { version = "1.35", features = ["full"] } +``` + +The crate uses `reqwest` with `rustls-tls` (no OpenSSL dependency) and `serde` for JSON serialisation. + +--- + +### Authentication + +The Rust SDK uses the OAuth 2.0 Client Credentials grant, managed transparently by `TokenManager`. You never call `TokenManager` directly — it is embedded in `AgentIdPClient` and invoked automatically before every request. + +**Token refresh behaviour:** +- The first API call triggers a `POST /oauth2/token` request with `grant_type=client_credentials`. +- The returned token is cached behind an async `tokio::sync::Mutex`. +- Subsequent calls within the token lifetime return the cached token without a network round trip. +- The cache expires 60 seconds before the server-reported `expires_in`, ensuring tokens never expire mid-flight. +- The `Mutex` guarantees only one refresh happens even when many `tokio` tasks call `get_token()` concurrently. + +**Environment variable construction:** + +```rust +use sentryagent_idp::AgentIdPClient; + +// from_env() reads AGENTIDP_API_URL, AGENTIDP_CLIENT_ID, AGENTIDP_CLIENT_SECRET +let client = AgentIdPClient::from_env()?; +``` + +**Explicit construction:** + +```rust +use sentryagent_idp::AgentIdPClient; + +let client = AgentIdPClient::new( + "https://api.sentryagent.ai", + "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "sk_live_...", +); +``` + +| Environment Variable | Required | Purpose | +|---|---|---| +| `AGENTIDP_API_URL` | Yes | Base URL of the AgentIdP API | +| `AGENTIDP_CLIENT_ID` | Yes | OAuth 2.0 client identifier | +| `AGENTIDP_CLIENT_SECRET` | Yes | OAuth 2.0 client secret | + +--- + +### Complete Working Example + +The following example covers the full agent identity lifecycle: register → generate credentials → issue token → retrieve agent → list audit logs → delete agent. + +```rust +use sentryagent_idp::{ + AgentIdPClient, AgentIdPError, + AuditLogFilters, MarketplaceFilters, RegisterAgentRequest, +}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Build client from environment variables. + // Requires: AGENTIDP_API_URL, AGENTIDP_CLIENT_ID, AGENTIDP_CLIENT_SECRET + let client = AgentIdPClient::from_env()?; + + // ── Register a new agent ────────────────────────────────────────────────── + let agent = client.register_agent(RegisterAgentRequest { + name: "my-screener-agent".to_owned(), + description: Some("Screens resumes using ML".to_owned()), + agent_type: "screener".to_owned(), + capabilities: vec!["resume:read".to_owned(), "classify".to_owned()], + metadata: None, + }).await?; + + println!("Registered: {} (DID: {})", agent.id, agent.did); + + // ── Generate credentials for the agent ─────────────────────────────────── + let creds = client.generate_credentials(&agent.id).await?; + println!("Client ID: {}", creds.client_id); + println!("Client Secret: {} (store this — shown once)", creds.client_secret); + + // ── Issue a scoped token (TokenManager handles this automatically) ──────── + let token_resp = client.issue_token(&agent.id, &["agents:read", "agents:write"]).await?; + println!("Token type: {}, expires in {}s", token_resp.token_type, token_resp.expires_in); + + // ── Retrieve the agent ──────────────────────────────────────────────────── + let fetched = client.get_agent(&agent.id).await?; + println!("Fetched: {} (public: {})", fetched.name, fetched.is_public); + + // ── List agents ─────────────────────────────────────────────────────────── + let list = client.list_agents(Some(1), Some(10)).await?; + println!("Total agents: {}", list.total); + + // ── Audit logs ──────────────────────────────────────────────────────────── + let logs = client.list_audit_logs(AuditLogFilters { + agent_id: Some(agent.id.clone()), + event_type: None, + from: None, + to: None, + page: 1, + per_page: 10, + }).await?; + println!("Audit events: {}", logs.total); + + // ── Rotate credentials ──────────────────────────────────────────────────── + let new_creds = client.rotate_credentials(&agent.id).await?; + println!("New secret: {}", new_creds.client_secret); + + // ── Delete agent ────────────────────────────────────────────────────────── + client.delete_agent(&agent.id).await?; + println!("Agent deleted."); + + Ok(()) +} +``` + +Run the bundled quickstart example directly: + +```bash +AGENTIDP_API_URL=http://localhost:3000 \ +AGENTIDP_CLIENT_ID=your-client-id \ +AGENTIDP_CLIENT_SECRET=your-client-secret \ +cargo run --example quickstart +``` + +--- + +### Client Methods Reference + +All methods are `async` and return `Result`. The client is cheap to clone — the inner `reqwest::Client` and token cache are shared via `Arc`. + +**Agent Registry** (`sdk-rust/src/agents.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `register_agent` | `(req: RegisterAgentRequest) -> Result` | `POST /agents` — 201 | +| `get_agent` | `(agent_id: &str) -> Result` | `GET /agents/{id}` — 200 | +| `list_agents` | `(page: Option, per_page: Option) -> Result` | `GET /agents` — 200 | +| `update_agent` | `(agent_id: &str, req: UpdateAgentRequest) -> Result` | `PATCH /agents/{id}` — 200 | +| `delete_agent` | `(agent_id: &str) -> Result<()>` | `DELETE /agents/{id}` — 204 | + +**Credential Management** (`sdk-rust/src/credentials.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `generate_credentials` | `(agent_id: &str) -> Result` | `POST /agents/{id}/credentials` — 201. `client_secret` shown once. | +| `rotate_credentials` | `(agent_id: &str) -> Result` | `POST /agents/{id}/credentials/rotate` — 200. New secret shown once. | +| `revoke_credentials` | `(agent_id: &str, cred_id: &str) -> Result<()>` | `DELETE /agents/{id}/credentials/{cred_id}` — 204 | + +**Token Operations** (`sdk-rust/src/oauth2.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `issue_token` | `(agent_id: &str, scopes: &[&str]) -> Result` | Issues a scoped Bearer JWT. Token is cached by `TokenManager` automatically. | + +**Audit Log** (`sdk-rust/src/audit.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `list_audit_logs` | `(filters: AuditLogFilters) -> Result` | Paginated audit log query with optional agent_id, event_type, from, to filters. | + +**Marketplace** (`sdk-rust/src/marketplace.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `list_public_agents` | `(filters: MarketplaceFilters) -> Result` | Lists publicly discoverable agents with optional `q`, `capability`, `publisher` filters. | + +**A2A Delegation** (`sdk-rust/src/delegation.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `delegate` | `(req: DelegateRequest) -> Result` | Creates a delegation chain and returns the delegation JWT. | +| `verify_delegation` | `(token: &str) -> Result` | Verifies a delegation token and returns the verified claims. | + +--- + +### Error Types + +All SDK operations return `Result`. Match on the enum variants for structured error handling: + +```rust +use sentryagent_idp::AgentIdPError; + +match client.get_agent("unknown-id").await { + Ok(agent) => println!("Found: {}", agent.name), + Err(AgentIdPError::NotFound(msg)) => { + eprintln!("Agent not found: {}", msg); + } + Err(AgentIdPError::AuthError(msg)) => { + eprintln!("Auth failed: {}", msg); + // Token may have been revoked — check credentials + } + Err(AgentIdPError::RateLimited { retry_after_secs }) => { + eprintln!("Rate limited — retry after {}s", retry_after_secs); + tokio::time::sleep(std::time::Duration::from_secs(retry_after_secs)).await; + } + Err(AgentIdPError::ApiError { status, message, code }) => { + eprintln!("API error {}: {} (code: {:?})", status, message, code); + } + Err(AgentIdPError::ConfigError(msg)) => { + // Missing environment variable — fix before running + eprintln!("Config error: {}", msg); + } + Err(AgentIdPError::HttpError(e)) => { + // reqwest transport error — network issue + eprintln!("HTTP transport error: {}", e); + } + Err(AgentIdPError::SerdeError(e)) => { + // JSON parse failure — API response shape mismatch + eprintln!("Serialization error: {}", e); + } + Err(AgentIdPError::DelegationError(msg)) => { + eprintln!("Delegation chain invalid: {}", msg); + } +} +``` + +| Variant | Trigger | HTTP status | +|---------|---------|-------------| +| `HttpError(reqwest::Error)` | Network-level failure (connection refused, timeout) | N/A | +| `ApiError { status, message, code }` | Non-2xx response not matching a specific variant | Any non-2xx | +| `AuthError(String)` | 401 or 403 from the API | 401, 403 | +| `NotFound(String)` | 404 from the API | 404 | +| `RateLimited { retry_after_secs }` | 429 — parses `Retry-After` header (defaults to 60s) | 429 | +| `ConfigError(String)` | Missing env var in `from_env()` | N/A | +| `SerdeError(serde_json::Error)` | JSON deserialisation failure | N/A | +| `DelegationError(String)` | Invalid delegation chain | N/A | + +--- + +### Adding a New Endpoint to the Rust SDK + +When the AgentIdP server adds a new API endpoint, add it to the Rust SDK using this checklist: + +**File structure** (`sdk-rust/src/`): + +``` +sdk-rust/src/ +├── lib.rs # Crate root — re-exports and module declarations +├── client.rs # AgentIdPClient struct and new()/from_env() constructors +├── token_manager.rs # TokenManager — async token cache +├── models.rs # All request/response structs (serde Serialize/Deserialize) +├── error.rs # AgentIdPError enum +├── agents.rs # Agent registry methods (impl AgentIdPClient) +├── credentials.rs # Credential management methods +├── oauth2.rs # Token issuance methods +├── audit.rs # Audit log methods +├── marketplace.rs # Marketplace methods +└── delegation.rs # A2A delegation methods +``` + +**Checklist:** + +- [ ] Add request/response structs to `models.rs` with `#[derive(Debug, serde::Serialize, serde::Deserialize)]` +- [ ] Add the method to the appropriate `impl AgentIdPClient` block in the relevant `.rs` file. If the endpoint belongs to a new domain, create a new file and declare it as `pub mod ;` in `lib.rs` +- [ ] Use `self.get_auth_header().await?` for the `Authorization: Bearer` header +- [ ] Use the shared `parse_response::(resp).await` helper (defined in `agents.rs`) to map HTTP status codes to `AgentIdPError` variants +- [ ] Add a doc comment (`///`) to the method with: the HTTP method + path, the success response type, and `# Errors` listing which `AgentIdPError` variants it can return +- [ ] Re-export new public types from `lib.rs` with `pub use models::{NewRequestType, NewResponseType};` +- [ ] Add a unit test using `mockito::Server` (see `token_manager.rs` tests for the pattern) +- [ ] Run `cargo test` and verify all tests pass +- [ ] Run `cargo doc --no-deps --open` and verify the new method appears with correct documentation +- [ ] Verify `cargo clippy -- -D warnings` exits 0 + +--- + +## 7. SDK Contribution Guide — Adding a New Endpoint When the server adds a new API endpoint, update all four SDKs. The checklist below covers each SDK. diff --git a/docs/engineering/README.md b/docs/engineering/README.md index 8f53c5d..6f4ee0c 100644 --- a/docs/engineering/README.md +++ b/docs/engineering/README.md @@ -12,15 +12,15 @@ | 2 | [System Architecture](02-architecture.md) | Component diagram, HTTP request lifecycle, OAuth 2.0 data flow, multi-region topology | 20 min | | 3 | [Technology Stack and ADRs](03-tech-stack.md) | Why each technology was chosen — rationale and alternatives considered | 20 min | | 4 | [Codebase Structure](04-codebase-structure.md) | Directory map, where to add new code, DRY enforcement rules | 15 min | -| 5 | [Service Deep Dives](05-services.md) | All 8 services/components — purpose, interface, schema, error types | 30 min | +| 5 | [Service Deep Dives](05-services.md) | All 17 services/components (incl. Phase 3–6: AnalyticsService, TierService, ComplianceService, FederationService, DIDService, WebhookService, BillingService, DelegationService, OIDCService) — purpose, interface, schema, error types | 45 min | | 6 | [Annotated Code Walkthroughs](06-walkthroughs.md) | Step-by-step traces of token issuance, agent registration, credential rotation | 30 min | | 7 | [Development Environment Setup](07-dev-setup.md) | Clone to running local stack — under 30 minutes | 30 min | | 8 | [Engineering Workflow](08-workflow.md) | OpenSpec spec-first workflow, branching, PR checklist, commit conventions | 20 min | | 9 | [Testing Strategy](09-testing.md) | Unit vs integration, coverage gates, how to write tests, OWASP reference | 20 min | | 10 | [Deployment and Operations](10-deployment.md) | Docker, Terraform, Prometheus/Grafana, operational runbook | 20 min | -| 11 | [SDK Integration Guide](11-sdk-guide.md) | All 4 SDKs — installation, examples, contribution guide | 20 min | +| 11 | [SDK Integration Guide](11-sdk-guide.md) | All 5 SDKs (Node.js, Python, Go, Java, Rust) — installation, examples, contribution guide | 25 min | -**Total estimated reading time for new engineers: ~3.5 hours** +**Total estimated reading time for new engineers: ~4 hours** --- @@ -34,8 +34,13 @@ | Add a new API endpoint | [08-workflow.md](08-workflow.md) + [04-codebase-structure.md](04-codebase-structure.md) | | Write tests | [09-testing.md](09-testing.md) | | Deploy to production | [10-deployment.md](10-deployment.md) | -| Integrate with the SDK | [11-sdk-guide.md](11-sdk-guide.md) | +| Integrate with the SDK (Node.js, Python, Go, Java, Rust) | [11-sdk-guide.md](11-sdk-guide.md) | | Understand why a technology was chosen | [03-tech-stack.md](03-tech-stack.md) | +| Understand tier limits and billing | [01-overview.md](01-overview.md) (Section 6) + [03-tech-stack.md](03-tech-stack.md) (ADR-11) | +| Understand AGNTCY compliance reports | [05-services.md](05-services.md) (ComplianceService) | +| Understand the A2A delegation flow | [06-walkthroughs.md](06-walkthroughs.md) (Walkthrough 4) | +| Run the AGNTCY conformance suite | [09-testing.md](09-testing.md) (Section 10.8) | +| Add a new Rust SDK endpoint | [11-sdk-guide.md](11-sdk-guide.md) (Section 6 contribution guide) | --- diff --git a/openspec/changes/archive/developer-docs-phase6-update/.openspec.yaml b/openspec/changes/archive/developer-docs-phase6-update/.openspec.yaml new file mode 100644 index 0000000..e36bc79 --- /dev/null +++ b/openspec/changes/archive/developer-docs-phase6-update/.openspec.yaml @@ -0,0 +1,23 @@ +id: developer-docs-phase6-update +title: "Developer Documentation — Phase 6 Complete Update" +status: complete +proposed: 2026-04-04 +approved: 2026-04-04 +approved-by: CEO +completed: 2026-04-04 +workstreams: + - id: WS1 + title: "Update api-reference.md — all current endpoints" + status: complete + - id: WS2 + title: "Update concepts.md — Phase 3–6 concepts" + status: complete + - id: WS3 + title: "Update quick-start.md — org creation prerequisite" + status: complete + - id: WS4 + title: "Update guides/ — revise existing + add Phase 3–6 guides" + status: complete + - id: WS5 + title: "Update README.md — fix typo, update index" + status: complete diff --git a/openspec/changes/archive/developer-docs-phase6-update/proposal.md b/openspec/changes/archive/developer-docs-phase6-update/proposal.md new file mode 100644 index 0000000..2039edc --- /dev/null +++ b/openspec/changes/archive/developer-docs-phase6-update/proposal.md @@ -0,0 +1,34 @@ +# OpenSpec Proposal — developer-docs-phase6-update + +**Status:** Approved +**Proposed:** 2026-04-04 +**Approved by:** CEO + +## Problem Statement + +The developer documentation in `docs/developers/` was written during Phase 2. Phases 3–6 shipped +analytics, API tiers, AGNTCY compliance, A2A delegation, DID identity, OIDC, webhooks, federation, +and the marketplace — none of which appear in the developer docs. The API reference covers only +14 endpoints; the current codebase has ~25+ endpoints. The README contains a "bedroom developers" +typo. External developers cannot use Phase 3–6 features without documentation. + +## Workstreams + +### WS1 — api-reference.md +Update to document all current endpoints with full request/response schemas, auth requirements, +and error codes. Replace the 14-endpoint Phase 1 reference with the complete Phase 6 surface. + +### WS2 — concepts.md +Add Phase 3–6 concepts: multi-tenancy/organizations, DID identity (did:web), OIDC provider, +A2A delegation, API tier plans (Free/Pro/Enterprise), and AGNTCY compliance. + +### WS3 — quick-start.md +Update to add org creation as Step 1 (now required before agent registration), and verify all +endpoint paths match current routes. + +### WS4 — guides/ +Revise the 4 existing guides for current endpoint paths and org-scoped requests. +Add new guides: analytics dashboard, tier management, A2A delegation, webhooks, AGNTCY compliance. + +### WS5 — README.md +Fix "bedroom developers" typo → "developers". Update document index to list all new guides. diff --git a/openspec/changes/archive/developer-docs-phase6-update/specs/ws1-api-reference/spec.md b/openspec/changes/archive/developer-docs-phase6-update/specs/ws1-api-reference/spec.md new file mode 100644 index 0000000..bcb2d3b --- /dev/null +++ b/openspec/changes/archive/developer-docs-phase6-update/specs/ws1-api-reference/spec.md @@ -0,0 +1,1559 @@ +# WS1 Spec — api-reference.md + +## Target file +`docs/developers/api-reference.md` + +## Objective +Replace the current 14-endpoint Phase 1 API reference with a complete reference covering all current +endpoints. The Developer must rewrite the file in full. Every endpoint must include: HTTP method + +path, one-line description, auth requirement, required scopes, request fields, response schema, +error responses, and a working curl example. + +--- + +## Document structure + +The updated file must open with the same Base URL and Authentication preamble (no changes). The +table of contents must list all sections below. The Errors section must be expanded with all new +error codes. Endpoint groups must appear in this order: + +1. Agent Registry +2. Credentials +3. OAuth 2.0 / Tokens +4. Audit Log +5. Organizations +6. Analytics +7. API Tiers +8. Compliance +9. Webhooks +10. Federation +11. DID / OIDC +12. A2A Delegation +13. Marketplace + +--- + +## Updated preamble (verbatim) + +``` +# API Reference + +Complete reference for all SentryAgent.ai AgentIdP endpoints. + +## Base URL + + http://localhost:3000/api/v1 + +The port is configured via the PORT environment variable (default: 3000). + +## Authentication + +All endpoints require a JWT Bearer token in the Authorization header unless noted otherwise: + + Authorization: Bearer + +Obtain a token via POST /api/v1/token using your agent's client_id and client_secret. + +Endpoints marked **No auth** do not require a Bearer token. Endpoints marked **Unauthenticated** are +intentionally public. +``` + +--- + +## Errors section — updated error codes table + +Preserve all existing error codes and add the following new ones: + +| Code | HTTP Status | Description | +|------|-------------|-------------| +| `ORG_NOT_FOUND` | 404 | Organization with the given `orgId` does not exist | +| `ORG_ALREADY_EXISTS` | 409 | An organization with this slug already exists | +| `ORG_SUSPENDED` | 403 | Organization is suspended — operations are blocked | +| `MEMBER_ALREADY_EXISTS` | 409 | Agent is already a member of this organization | +| `DELEGATION_NOT_FOUND` | 404 | Delegation chain with the given `chainId` does not exist | +| `DELEGATION_EXPIRED` | 403 | Delegation token has expired | +| `DELEGATION_REVOKED` | 403 | Delegation token has been revoked | +| `DELEGATION_SCOPE_EXCEEDED` | 403 | Requested scopes exceed the delegator's own scopes | +| `TIER_UPGRADE_NOT_REQUIRED` | 400 | Target tier is not higher than the current tier | +| `WEBHOOK_NOT_FOUND` | 404 | Webhook subscription with the given `id` does not exist | +| `PARTNER_NOT_FOUND` | 404 | Federation partner with the given `id` does not exist | +| `COMPLIANCE_DISABLED` | 404 | AGNTCY compliance endpoints are disabled on this instance | + +--- + +## Section 1 — Agent Registry + +### POST /agents — Register a new agent + +**Description**: Creates a new AI agent identity. The `agentId` is system-assigned. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `email` | string | Yes | Unique email-format identifier for the agent | +| `agentType` | enum | Yes | `screener` \| `classifier` \| `orchestrator` \| `extractor` \| `summarizer` \| `router` \| `monitor` \| `custom` | +| `version` | string | Yes | Semantic version string, e.g. `1.0.0` | +| `capabilities` | string[] | Yes | One or more `resource:action` strings (min 1) | +| `owner` | string | Yes | Owning team or organisation, 1–128 characters | +| `deploymentEnv` | enum | Yes | `development` \| `staging` \| `production` | +| `organization_id` | string | No | UUID of the org to scope the agent to. Required on multi-tenant instances. | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `agentId` | UUID | System-assigned immutable identifier | +| `email` | string | Unique email identifier | +| `agentType` | string | Agent type | +| `version` | string | Semantic version | +| `capabilities` | string[] | Capability list | +| `owner` | string | Owning team | +| `deploymentEnv` | string | Deployment environment | +| `status` | string | Always `active` on creation | +| `createdAt` | ISO 8601 | Registration timestamp | +| `updatedAt` | ISO 8601 | Last update timestamp | + +**Error responses**: + +| Code | HTTP | Error code | +|------|------|-----------| +| Validation failure | 400 | `VALIDATION_ERROR` | +| Invalid token | 401 | `UNAUTHORIZED` | +| Missing scope | 403 | `INSUFFICIENT_SCOPE` | +| Free tier limit | 403 | `FREE_TIER_LIMIT_EXCEEDED` | +| Email taken | 409 | `AGENT_ALREADY_EXISTS` | +| Rate limit | 429 | `RATE_LIMIT_EXCEEDED` | + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "email": "screener-001@talent.ai", + "agentType": "screener", + "version": "1.0.0", + "capabilities": ["resume:read", "email:send"], + "owner": "talent-team", + "deploymentEnv": "production" + }' | jq . +``` + +--- + +### GET /agents — List agents + +**Description**: Returns a paginated list of registered agents. +**Auth**: Bearer token with `agents:read` scope. + +**Query parameters**: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `page` | integer | 1 | Page number (1-based) | +| `limit` | integer | 20 | Results per page (max 100) | +| `owner` | string | — | Filter by owner (exact match) | +| `agentType` | enum | — | Filter by agent type | +| `status` | enum | — | Filter by status (`active` \| `suspended` \| `decommissioned`) | + +**Response** `200 OK`: `{ data: Agent[], total: number, page: number, limit: number }` + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents?page=1&limit=20&status=active" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /agents/{agentId} — Get agent by ID + +**Description**: Returns the full identity record for a single agent. +**Auth**: Bearer token with `agents:read` scope. + +**Path parameters**: `agentId` (UUID) + +**Response** `200 OK`: Full agent object (same fields as POST response). + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### PATCH /agents/{agentId} — Update agent metadata + +**Description**: Partially updates agent metadata. Immutable fields (`agentId`, `email`, `createdAt`) cannot be changed. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`) — all fields optional: + +| Field | Type | Description | +|-------|------|-------------| +| `agentType` | enum | Updated agent type | +| `version` | string | Updated semantic version | +| `capabilities` | string[] | Updated capabilities (replaces full list) | +| `owner` | string | Updated owner | +| `deploymentEnv` | enum | Updated deployment environment | +| `status` | enum | `active` \| `suspended` \| `decommissioned`. Setting `decommissioned` is irreversible. | + +**Response** `200 OK`: Full updated agent object. + +**Error responses**: 400 `VALIDATION_ERROR` / `IMMUTABLE_FIELD`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE` / `AGENT_DECOMMISSIONED`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/agents/$AGENT_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "version": "1.5.0", "status": "suspended" }' | jq . +``` + +--- + +### DELETE /agents/{agentId} — Decommission an agent + +**Description**: Permanently decommissions an agent (soft delete). All active credentials are immediately revoked. Irreversible. +**Auth**: Bearer token with `agents:write` scope. + +**Response** `204 No Content` (empty body). + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND`, 409 `AGENT_ALREADY_DECOMMISSIONED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/agents/$AGENT_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +## Section 2 — Credentials + +### POST /agents/{agentId}/credentials — Generate credentials + +**Description**: Creates a new `client_id` + `client_secret` pair. The `clientSecret` is returned once only. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`) — optional: + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `expiresAt` | ISO 8601 | No | Optional expiry date. Must be a future date. Omit for non-expiring credential. | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `credentialId` | UUID | Unique credential identifier | +| `clientId` | UUID | Same as `agentId` | +| `clientSecret` | string | Plaintext secret (shown once only — store immediately) | +| `status` | string | `active` | +| `createdAt` | ISO 8601 | Creation timestamp | +| `expiresAt` | ISO 8601 \| null | Expiry date or null | +| `revokedAt` | ISO 8601 \| null | Always null on creation | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE` / `AGENT_NOT_ACTIVE`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "expiresAt": "2027-01-01T00:00:00.000Z" }' | jq . +``` + +--- + +### GET /agents/{agentId}/credentials — List credentials + +**Description**: Returns all credentials for an agent (active and revoked). The `clientSecret` is never returned. +**Auth**: Bearer token with `agents:read` scope. + +**Query parameters**: `page` (default 1), `limit` (default 20, max 100), `status` (`active` \| `revoked`) + +**Response** `200 OK`: `{ data: Credential[], total: number, page: number, limit: number }` + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials?status=active" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### POST /agents/{agentId}/credentials/{credentialId}/rotate — Rotate a credential + +**Description**: Generates a new `clientSecret` for the same `credentialId`. The old secret is immediately invalidated. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`) — optional: `{ "expiresAt": "ISO 8601" }` + +**Response** `200 OK`: Full credential object with new `clientSecret` (shown once only). + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND` / `CREDENTIAL_NOT_FOUND`, 409 `CREDENTIAL_ALREADY_REVOKED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST \ + "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials/$CREDENTIAL_ID/rotate" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{}' | jq . +``` + +--- + +### DELETE /agents/{agentId}/credentials/{credentialId} — Revoke a credential + +**Description**: Permanently revokes a credential. Irreversible. +**Auth**: Bearer token with `agents:write` scope. + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AGENT_NOT_FOUND` / `CREDENTIAL_NOT_FOUND`, 409 `CREDENTIAL_ALREADY_REVOKED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE \ + "http://localhost:3000/api/v1/agents/$AGENT_ID/credentials/$CREDENTIAL_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +## Section 3 — OAuth 2.0 / Tokens + +### POST /token — Issue an access token + +**Description**: Issues a signed RS256 JWT via the OAuth 2.0 Client Credentials grant. +**Auth**: No Bearer token — credentials are in the request body. +**Content-Type**: `application/x-www-form-urlencoded` + +**Request fields** (form-encoded): + +| Field | Required | Description | +|-------|----------|-------------| +| `grant_type` | Yes | Must be `client_credentials` | +| `client_id` | Yes | Agent's `agentId` (UUID) | +| `client_secret` | Yes | Credential secret | +| `scope` | No | Space-separated scopes. If omitted, all scopes are granted. | + +**Response** `200 OK`: + +| Field | Type | Description | +|-------|------|-------------| +| `access_token` | string | Signed RS256 JWT | +| `token_type` | string | Always `Bearer` | +| `expires_in` | integer | Lifetime in seconds (3600) | +| `scope` | string | Granted scopes (space-separated) | + +**Error responses**: + +| Code | HTTP | Error | +|------|------|-------| +| Bad request / bad grant | 400 | `{ "error": "unsupported_grant_type" }` | +| Bad credentials | 401 | `{ "error": "invalid_client" }` | +| Agent suspended or monthly limit | 403 | `{ "error": "unauthorized_client" }` | +| Rate limit | 429 | `RATE_LIMIT_EXCEEDED` | + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/token \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials" \ + -d "client_id=$CLIENT_ID" \ + -d "client_secret=$CLIENT_SECRET" \ + -d "scope=agents:read agents:write" | jq . +``` + +--- + +### POST /token/introspect — Introspect a token + +**Description**: Checks whether a token is active. Always returns `200 OK` — check the `active` field. +**Auth**: Bearer token with `tokens:read` scope. +**Content-Type**: `application/x-www-form-urlencoded` + +**Request fields**: `token` (required), `token_type_hint` (optional, `access_token`) + +**Response** `200 OK` (active): `{ "active": true, "sub": "...", "client_id": "...", "scope": "...", "token_type": "Bearer", "iat": 0, "exp": 0 }` +**Response** `200 OK` (inactive): `{ "active": false }` + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/token/introspect \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "token=$TOKEN_TO_CHECK" | jq . +``` + +--- + +### POST /token/revoke — Revoke a token + +**Description**: Immediately invalidates a token. Idempotent. +**Auth**: Bearer token. +**Content-Type**: `application/x-www-form-urlencoded` + +**Request fields**: `token` (required), `token_type_hint` (optional) + +**Response** `200 OK`: `{}` (empty object) + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `FORBIDDEN`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/token/revoke \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "token=$TOKEN_TO_REVOKE" | jq . +``` + +--- + +## Section 4 — Audit Log + +### GET /audit — Query audit log + +**Description**: Returns a paginated, filtered list of audit events (most recent first). +**Auth**: Bearer token with `audit:read` scope. + +**Query parameters**: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `page` | integer | 1 | Page number | +| `limit` | integer | 50 | Results per page (max 200) | +| `agentId` | UUID | — | Filter by agent | +| `action` | string | — | Filter by action type (e.g. `token.issued`, `agent.created`) | +| `outcome` | enum | — | `success` or `failure` | +| `fromDate` | ISO 8601 | — | Events at or after this timestamp (max 90 days ago) | +| `toDate` | ISO 8601 | — | Events at or before this timestamp | + +**Response** `200 OK`: `{ data: AuditEvent[], total: number, page: number, limit: number }` + +**AuditEvent fields**: `eventId` (UUID), `agentId` (UUID), `action` (string), `outcome` (string), `ipAddress` (string), `userAgent` (string), `metadata` (object), `timestamp` (ISO 8601) + +**Error responses**: 400 `VALIDATION_ERROR` / `RETENTION_WINDOW_EXCEEDED`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/audit?agentId=$AGENT_ID&action=token.issued&limit=50" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /audit/{eventId} — Get audit event by ID + +**Description**: Returns a single audit event by its immutable `eventId`. +**Auth**: Bearer token with `audit:read` scope. + +**Path parameters**: `eventId` (UUID) + +**Response** `200 OK`: Single AuditEvent object. + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `AUDIT_EVENT_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/audit/$EVENT_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /audit/verify — Verify audit chain integrity + +**Description**: Verifies the cryptographic hash chain of all audit events. Returns `verified: true` if the chain is intact. Rate limited to 30 req/min (computationally intensive). +**Auth**: Bearer token with `audit:read` scope. + +**Query parameters**: `fromDate` (ISO 8601, optional), `toDate` (ISO 8601, optional) + +**Response** `200 OK`: + +| Field | Type | Description | +|-------|------|-------------| +| `verified` | boolean | `true` if chain is intact, `false` if tampering detected | +| `checkedCount` | integer | Number of events checked | +| `fromDate` | ISO 8601 \| null | Verification window start | +| `toDate` | ISO 8601 \| null | Verification window end | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/audit/verify" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 5 — Organizations + +### POST /organizations — Create an organization + +**Description**: Creates a new tenant organization. Agents can be scoped to an organization via `organization_id`. +**Auth**: Bearer token (OPA scope enforcement — `admin:orgs` or equivalent policy). + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | Yes | Display name, 1–255 characters | +| `slug` | string | Yes | URL-safe identifier — lowercase letters, digits, hyphens only | +| `planTier` | enum | No | `free` (default) \| `pro` \| `enterprise` | +| `maxAgents` | integer | No | Override the plan default agent limit | +| `maxTokensPerMonth` | integer | No | Override the plan default monthly token limit | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `organizationId` | UUID | System-assigned organization identifier | +| `name` | string | Display name | +| `slug` | string | URL-safe slug | +| `planTier` | string | Current plan tier | +| `maxAgents` | integer | Agent limit | +| `maxTokensPerMonth` | integer | Monthly token limit | +| `status` | string | `active` | +| `createdAt` | ISO 8601 | Creation timestamp | +| `updatedAt` | ISO 8601 | Last update timestamp | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 409 `ORG_ALREADY_EXISTS`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "Acme AI", + "slug": "acme-ai", + "planTier": "pro" + }' | jq . +``` + +--- + +### GET /organizations — List organizations + +**Description**: Returns a paginated list of organizations. +**Auth**: Bearer token (OPA scope enforcement). + +**Query parameters**: `page` (default 1), `limit` (default 20, max 100), `status` (`active` \| `suspended` \| `deleted`) + +**Response** `200 OK`: `{ data: Organization[], total: number, page: number, limit: number }` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/organizations?status=active" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /organizations/{orgId} — Get organization by ID + +**Description**: Returns the full record for a single organization. +**Auth**: Bearer token (OPA scope enforcement). + +**Path parameters**: `orgId` (UUID) + +**Response** `200 OK`: Full organization object. + +**Error responses**: 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/organizations/$ORG_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### PATCH /organizations/{orgId} — Update organization + +**Description**: Partially updates an organization. The `slug` is immutable after creation. +**Auth**: Bearer token (OPA scope enforcement). + +**Request body** (`application/json`) — all fields optional: + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | New display name | +| `planTier` | enum | `free` \| `pro` \| `enterprise` | +| `maxAgents` | integer | New agent limit | +| `maxTokensPerMonth` | integer | New token limit | +| `status` | enum | `active` \| `suspended` (use DELETE to set `deleted`) | + +**Response** `200 OK`: Full updated organization object. + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/organizations/$ORG_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "planTier": "enterprise" }' | jq . +``` + +--- + +### DELETE /organizations/{orgId} — Delete organization + +**Description**: Soft-deletes an organization. Sets status to `deleted`. +**Auth**: Bearer token (OPA scope enforcement). + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/organizations/$ORG_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +### POST /organizations/{orgId}/members — Add a member + +**Description**: Adds an existing agent to an organization with a specified role. +**Auth**: Bearer token (OPA scope enforcement). + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `agentId` | UUID | Yes | The agent to add | +| `role` | enum | Yes | `member` \| `admin` | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `memberId` | UUID | Membership record identifier | +| `organizationId` | UUID | Organization | +| `agentId` | UUID | Agent | +| `role` | string | `member` or `admin` | +| `joinedAt` | ISO 8601 | Membership creation timestamp | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 404 `ORG_NOT_FOUND` / `AGENT_NOT_FOUND`, 409 `MEMBER_ALREADY_EXISTS`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST "http://localhost:3000/api/v1/organizations/$ORG_ID/members" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "agentId": "'$AGENT_ID'", + "role": "member" + }' | jq . +``` + +--- + +## Section 6 — Analytics + +All analytics endpoints are scoped to the authenticated agent's `organization_id`. + +### GET /analytics/tokens — Token issuance trend + +**Description**: Returns daily token issuance counts for the past N days, scoped to the current organization. +**Auth**: Bearer token. + +**Query parameters**: + +| Parameter | Type | Default | Max | Description | +|-----------|------|---------|-----|-------------| +| `days` | integer | 30 | 90 | Number of days to return | + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "days": 30, + "data": [ + { "date": "2026-03-01", "count": 142 }, + { "date": "2026-03-02", "count": 198 } + ] +} +``` + +**Error responses**: 400 `VALIDATION_ERROR` (days > 90), 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=30" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /analytics/agents/activity — Agent activity heatmap + +**Description**: Returns agent request counts grouped by day-of-week and hour (UTC), for the current organization. +**Auth**: Bearer token. + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "data": [ + { "dow": 1, "hour": 9, "count": 54 }, + { "dow": 1, "hour": 10, "count": 87 } + ] +} +``` + +`dow` is 0 (Sunday) through 6 (Saturday). `hour` is 0–23 UTC. + +**Error responses**: 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents/activity" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /analytics/agents — Per-agent usage summary + +**Description**: Returns token issuance counts per agent for the current calendar month, for the current organization. +**Auth**: Bearer token. + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "month": "2026-03", + "data": [ + { "agentId": "uuid", "tokenCount": 312 }, + { "agentId": "uuid2", "tokenCount": 87 } + ] +} +``` + +**Error responses**: 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 7 — API Tiers + +### GET /tiers/status — Get current tier status + +**Description**: Returns the organization's current plan tier, configured limits, and live usage counters. +**Auth**: Bearer token with a valid `organization_id` claim. + +**Response** `200 OK`: + +| Field | Type | Description | +|-------|------|-------------| +| `tier` | string | `free` \| `pro` \| `enterprise` | +| `limits.maxAgents` | integer | Maximum agents allowed | +| `limits.maxCallsPerDay` | integer | Maximum API calls per day | +| `limits.maxTokensPerDay` | integer | Maximum token issuances per day | +| `usage.agentCount` | integer | Current active agent count | +| `usage.callsToday` | integer | API calls made today | +| `usage.tokensToday` | integer | Tokens issued today | + +**Error responses**: 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/tiers/status" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### POST /tiers/upgrade — Initiate tier upgrade + +**Description**: Creates a Stripe Checkout Session to upgrade the organization to a higher plan tier. Returns a one-time checkout URL to redirect the user to. +**Auth**: Bearer token with a valid `organization_id` claim. + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `target_tier` | enum | Yes | `pro` \| `enterprise` — must be higher than current tier | + +**Response** `200 OK`: + +```json +{ "checkoutUrl": "https://checkout.stripe.com/pay/cs_live_..." } +``` + +**Error responses**: 400 `VALIDATION_ERROR` / `TIER_UPGRADE_NOT_REQUIRED`, 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "pro" }' | jq . +``` + +--- + +## Section 8 — Compliance + +### GET /compliance/controls — SOC 2 control status (public) + +**Description**: Returns the live status of all SOC 2 Trust Services Criteria controls. No authentication required. +**Auth**: None. + +**Response** `200 OK` (`Cache-Control: public, max-age=60`): + +```json +{ + "controls": [ + { "id": "CC6.1", "name": "Logical Access Controls", "status": "pass", "lastChecked": "2026-04-04T00:00:00.000Z" }, + { "id": "CC7.2", "name": "System Monitoring", "status": "pass", "lastChecked": "2026-04-04T00:00:00.000Z" } + ] +} +``` + +Each control: `id` (string), `name` (string), `status` (`pass` \| `fail` \| `unknown`), `lastChecked` (ISO 8601) + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/controls" | jq . +``` + +--- + +### GET /compliance/report — AGNTCY compliance report + +**Description**: Generates an AGNTCY compliance report for the authenticated tenant. Cached in Redis for 5 minutes. Sets `X-Cache: HIT` when served from cache. +**Auth**: Bearer token. + +**Response** `200 OK`: + +```json +{ + "tenantId": "org-uuid", + "generatedAt": "2026-04-04T00:00:00.000Z", + "agntcyConformance": true, + "agentCount": 12, + "verifiedAgentCount": 12, + "auditChainIntegrity": true, + "from_cache": false +} +``` + +**Error responses**: 401 `UNAUTHORIZED`, 404 `COMPLIANCE_DISABLED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/report" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /compliance/agent-cards — Export AGNTCY agent cards + +**Description**: Exports all active agents for the authenticated tenant as AGNTCY-standard agent card JSON objects. +**Auth**: Bearer token. + +**Response** `200 OK`: Array of agent card objects. + +Each card: `did` (string), `name` (string), `agentType` (string), `capabilities` (string[]), `owner` (string), `version` (string), `deploymentEnv` (string), `identityProvider` (string), `issuedAt` (ISO 8601) + +**Error responses**: 401 `UNAUTHORIZED`, 404 `COMPLIANCE_DISABLED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/agent-cards" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 9 — Webhooks + +All webhook endpoints require Bearer token authentication and are scoped to the authenticated agent's `organization_id`. Required scopes are enforced via OPA policy. + +### POST /webhooks — Create a subscription + +**Description**: Creates a new webhook subscription for the organization. The `signingSecret` is returned once only. +**Auth**: Bearer token. OPA enforces `webhooks:write`. + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | Yes | Human-readable subscription name | +| `url` | string | Yes | HTTPS endpoint that will receive events | +| `events` | string[] | Yes | One or more event types to subscribe to (see event type list below) | + +**Available event types**: `agent.created`, `agent.updated`, `agent.suspended`, `agent.reactivated`, `agent.decommissioned`, `credential.generated`, `credential.rotated`, `credential.revoked`, `token.issued`, `token.revoked` + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `id` | UUID | Subscription identifier | +| `organization_id` | UUID | Owning organization | +| `name` | string | Subscription name | +| `url` | string | Target endpoint URL | +| `events` | string[] | Subscribed event types | +| `active` | boolean | `true` | +| `signingSecret` | string | HMAC-SHA256 signing secret (shown once) | +| `failure_count` | integer | `0` | +| `created_at` | ISO 8601 | Creation timestamp | +| `updated_at` | ISO 8601 | Last update timestamp | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/webhooks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "prod-events", + "url": "https://my-app.example.com/hooks/sentryagent", + "events": ["agent.created", "token.issued"] + }' | jq . +``` + +--- + +### GET /webhooks — List subscriptions + +**Description**: Returns all webhook subscriptions for the organization. Signing secrets are never returned. +**Auth**: Bearer token. OPA enforces `webhooks:read`. + +**Response** `200 OK`: Array of subscription objects (without `signingSecret`). + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/webhooks" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /webhooks/{id} — Get subscription by ID + +**Description**: Returns a single subscription by its UUID. +**Auth**: Bearer token. OPA enforces `webhooks:read`. + +**Path parameters**: `id` (UUID) + +**Response** `200 OK`: Single subscription object (without `signingSecret`). + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### PATCH /webhooks/{id} — Update subscription + +**Description**: Partially updates a webhook subscription. +**Auth**: Bearer token. OPA enforces `webhooks:write`. + +**Request body** (`application/json`) — all fields optional: `name` (string), `url` (string), `events` (string[]), `active` (boolean) + +**Response** `200 OK`: Updated subscription object. + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "active": false }' | jq . +``` + +--- + +### DELETE /webhooks/{id} — Delete subscription + +**Description**: Permanently deletes a webhook subscription and all its delivery records. +**Auth**: Bearer token. OPA enforces `webhooks:write`. + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +### GET /webhooks/{id}/deliveries — List delivery history + +**Description**: Returns a paginated list of delivery attempts for a subscription. +**Auth**: Bearer token. OPA enforces `webhooks:read`. + +**Query parameters**: `limit` (default 20), `offset` (default 0) + +**Response** `200 OK`: + +```json +{ + "deliveries": [...], + "total": 47, + "limit": 20, + "offset": 0 +} +``` + +Each delivery: `id` (UUID), `subscription_id` (UUID), `event_type` (string), `payload` (object), `status` (`pending` \| `delivered` \| `failed` \| `dead_letter`), `http_status_code` (integer \| null), `attempt_count` (integer), `next_retry_at` (ISO 8601 \| null), `delivered_at` (ISO 8601 \| null), `created_at` (ISO 8601), `updated_at` (ISO 8601) + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `WEBHOOK_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID/deliveries?limit=20&offset=0" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Section 10 — Federation + +All partner management endpoints require the `admin:orgs` scope (enforced via OPA). The verify +endpoint requires any authenticated agent. + +### POST /federation/trust — Register a trusted partner + +**Description**: Registers a new trusted federation partner (a remote IdP whose tokens this instance will accept). +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Request body** (`application/json`): Implementation-defined fields for partner registration including `name` (string), `issuer` (string — partner's token issuer URL), `jwksUri` (string — partner's JWKS endpoint). + +**Response** `201 Created`: Partner record with `id`, `name`, `issuer`, `jwksUri`, `createdAt`. + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/federation/trust \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "PartnerCorp IdP", + "issuer": "https://idp.partnercorp.com", + "jwksUri": "https://idp.partnercorp.com/.well-known/jwks.json" + }' | jq . +``` + +--- + +### GET /federation/partners — List partners + +**Description**: Returns all registered federation partners. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Response** `200 OK`: Array of partner records. + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/federation/partners" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /federation/partners/{id} — Get partner by ID + +**Description**: Returns a single federation partner record. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Path parameters**: `id` (UUID) + +**Error responses**: 401 `UNAUTHORIZED`, 403 `INSUFFICIENT_SCOPE`, 404 `PARTNER_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/federation/partners/$PARTNER_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### PATCH /federation/partners/{id} — Update partner + +**Description**: Partially updates a federation partner record. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**curl example**: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/federation/partners/$PARTNER_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "name": "Updated Partner Name" }' | jq . +``` + +--- + +### DELETE /federation/partners/{id} — Delete partner + +**Description**: Removes a federation partner. This instance will no longer accept tokens from the partner's issuer. +**Auth**: Bearer token. OPA enforces `admin:orgs`. + +**Response** `204 No Content`. + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/federation/partners/$PARTNER_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +### POST /federation/verify — Verify a federated token + +**Description**: Verifies a token issued by a trusted federation partner. Returns the decoded claims if the token is valid and the issuer is trusted. +**Auth**: Bearer token (any authenticated agent — no `admin:orgs` required). + +**Request body** (`application/json`): `{ "token": "" }` + +**Response** `200 OK`: `{ "valid": true, "claims": { ... } }` or `{ "valid": false, "reason": "..." }` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/federation/verify \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "token": "'$PARTNER_TOKEN'" }' | jq . +``` + +--- + +## Section 11 — DID / OIDC + +### GET /agents/{agentId}/did — Get agent DID document + +**Description**: Returns the W3C DID Core 1.0 document for an agent. Unauthenticated — publicly accessible. +**Auth**: None. + +**Response** `200 OK`: W3C DID Document. + +```json +{ + "@context": ["https://www.w3.org/ns/did/v1"], + "id": "did:web:localhost%3A3000:agents:a1b2c3d4", + "controller": "did:web:localhost%3A3000:agents:a1b2c3d4", + "verificationMethod": [{ + "id": "did:web:localhost%3A3000:agents:a1b2c3d4#key-1", + "type": "JsonWebKey2020", + "controller": "did:web:localhost%3A3000:agents:a1b2c3d4", + "publicKeyJwk": { "kty": "RSA", "n": "...", "e": "AQAB" } + }], + "authentication": ["did:web:localhost%3A3000:agents:a1b2c3d4#key-1"], + "agntcy": { + "agentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "agentType": "screener", + "capabilities": ["resume:read"], + "deploymentEnv": "production", + "owner": "talent-team", + "version": "1.0.0" + } +} +``` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/did" | jq . +``` + +--- + +### GET /agents/{agentId}/did/resolve — Resolve agent DID + +**Description**: Returns the full W3C DID Resolution Result format including metadata. +**Auth**: Bearer token + OPA policy. + +**Response** `200 OK`: + +```json +{ + "didDocument": { ... }, + "didDocumentMetadata": { + "created": "2026-03-28T09:00:00.000Z", + "updated": "2026-03-28T09:00:00.000Z", + "deactivated": false + }, + "didResolutionMetadata": { + "contentType": "application/did+ld+json", + "retrieved": "2026-04-04T00:00:00.000Z" + } +} +``` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/did/resolve" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### GET /agents/{agentId}/did/card — Get AGNTCY agent card + +**Description**: Returns the AGNTCY-format agent card for an agent. Unauthenticated. +**Auth**: None. + +**Response** `200 OK`: + +```json +{ + "did": "did:web:localhost%3A3000:agents:a1b2c3d4", + "name": "screener-001@talent.ai", + "agentType": "screener", + "capabilities": ["resume:read"], + "owner": "talent-team", + "version": "1.0.0", + "deploymentEnv": "production", + "identityProvider": "https://sentryagent.ai", + "issuedAt": "2026-04-04T00:00:00.000Z" +} +``` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/agents/$AGENT_ID/did/card" | jq . +``` + +--- + +### GET /.well-known/openid-configuration — OIDC discovery document + +**Description**: Returns the OIDC Provider discovery document. Unauthenticated. Mounted at the server root (not under `/api/v1`). +**Auth**: None. + +**curl example**: + +```bash +curl -s "http://localhost:3000/.well-known/openid-configuration" | jq . +``` + +--- + +### GET /.well-known/jwks.json — JWKS endpoint + +**Description**: Returns the JSON Web Key Set (public keys used to verify ID tokens). Unauthenticated. +**Auth**: None. + +**curl example**: + +```bash +curl -s "http://localhost:3000/.well-known/jwks.json" | jq . +``` + +--- + +### GET /agent-info — Agent identity claims + +**Description**: Returns identity claims for the authenticated agent (equivalent to UserInfo in OIDC). Mounted at the server root. +**Auth**: Bearer token. + +**curl example**: + +```bash +curl -s "http://localhost:3000/agent-info" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### POST /api/v1/oidc/token — OIDC token exchange (GitHub Actions) + +**Description**: Exchanges a GitHub OIDC JWT for a SentryAgent.ai access token. Unauthenticated — the GitHub OIDC token is the credential. Trust-policy enforcement happens inside the controller. +**Auth**: None (GitHub OIDC JWT in body). + +**Request body** (`application/json`): `{ "github_token": "", "agentId": "" }` + +**Response** `200 OK`: `{ "access_token": "...", "token_type": "Bearer", "expires_in": 3600 }` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oidc/token \ + -H "Content-Type: application/json" \ + -d '{ + "github_token": "'$GITHUB_OIDC_TOKEN'", + "agentId": "'$AGENT_ID'" + }' | jq . +``` + +--- + +### POST /api/v1/oidc/trust-policies — Create trust policy + +**Description**: Registers a trust policy that allows GitHub Actions workflows matching specific claims to exchange tokens. +**Auth**: Bearer token with `agents:write` scope. + +**Request body** (`application/json`): Repository, branch, and claim constraints (implementation-defined fields). + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oidc/trust-policies \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "agentId": "'$AGENT_ID'", + "repository": "my-org/my-repo", + "branch": "main" + }' | jq . +``` + +--- + +### GET /api/v1/oidc/trust-policies — List trust policies + +**Description**: Returns all trust policies for an agent. +**Auth**: Bearer token with `agents:write` scope. + +**Query parameters**: `agentId` (UUID, required) + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/oidc/trust-policies?agentId=$AGENT_ID" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +### DELETE /api/v1/oidc/trust-policies/{id} — Delete trust policy + +**Description**: Deletes a trust policy by its UUID. +**Auth**: Bearer token with `agents:write` scope. + +**Response** `204 No Content`. + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/oidc/trust-policies/$POLICY_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +## Section 12 — A2A Delegation + +### POST /oauth2/token/delegate — Create a delegation chain + +**Description**: Creates a delegation chain that grants a delegatee agent a subset of the delegator's scopes for a limited time. +**Auth**: Bearer token. + +**Request body** (`application/json`): + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `delegateeAgentId` | UUID | Yes | The agent that receives delegated authority | +| `scopes` | string[] | Yes | Scopes to delegate — must be a strict subset of the caller's own scopes | +| `ttlSeconds` | integer | Yes | Delegation lifetime in seconds. Min: 60, Max: 86400 | + +**Response** `201 Created`: + +| Field | Type | Description | +|-------|------|-------------| +| `delegationToken` | string | Signed delegation token (HMAC-SHA256) | +| `chainId` | UUID | Delegation chain identifier | +| `delegatorAgentId` | UUID | Agent granting the delegation | +| `delegateeAgentId` | UUID | Agent receiving the delegation | +| `scopes` | string[] | Delegated scopes | +| `expiresAt` | ISO 8601 | Expiry timestamp | + +**Error responses**: 400 `VALIDATION_ERROR` / `DELEGATION_SCOPE_EXCEEDED`, 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/delegate \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "delegateeAgentId": "'$DELEGATEE_AGENT_ID'", + "scopes": ["agents:read"], + "ttlSeconds": 3600 + }' | jq . +``` + +--- + +### POST /oauth2/token/verify-delegation — Verify a delegation token + +**Description**: Verifies a delegation token and returns the chain details. Returns `valid: false` (not an error) for expired or revoked tokens. +**Auth**: Bearer token. + +**Request body** (`application/json`): `{ "delegationToken": "" }` + +**Response** `200 OK`: + +| Field | Type | Description | +|-------|------|-------------| +| `valid` | boolean | Whether the delegation is currently valid | +| `chainId` | UUID | Chain identifier | +| `delegatorAgentId` | UUID | Delegating agent | +| `delegateeAgentId` | UUID | Receiving agent | +| `scopes` | string[] | Delegated scopes | +| `issuedAt` | ISO 8601 | Issue timestamp | +| `expiresAt` | ISO 8601 | Expiry timestamp | +| `revokedAt` | ISO 8601 \| null | Revocation timestamp, or null | + +**Error responses**: 400 `VALIDATION_ERROR`, 401 `UNAUTHORIZED`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/verify-delegation \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "delegationToken": "'$DELEGATION_TOKEN'" }' | jq . +``` + +--- + +### DELETE /oauth2/token/delegate/{chainId} — Revoke a delegation chain + +**Description**: Immediately revokes a delegation chain. The delegatee can no longer use the delegation token. Only the delegator can revoke their own chains. +**Auth**: Bearer token. + +**Path parameters**: `chainId` (UUID) + +**Response** `204 No Content`. + +**Error responses**: 401 `UNAUTHORIZED`, 403 `FORBIDDEN`, 404 `DELEGATION_NOT_FOUND`, 429 `RATE_LIMIT_EXCEEDED` + +**curl example**: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/oauth2/token/delegate/$CHAIN_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +--- + +## Section 13 — Marketplace + +The Marketplace is feature-flagged via `MARKETPLACE_ENABLED` env var. When disabled, all endpoints +return `404 NOT_FOUND`. All marketplace endpoints are **unauthenticated** — no Bearer token required. + +### GET /marketplace/agents — List public agents + +**Description**: Returns a paginated list of publicly-listed agents. +**Auth**: None. + +**Query parameters**: `page` (default 1), `limit` (default 20, max 100), `q` (text search), `capability` (filter by capability string), `publisher` (filter by owner) + +**Response** `200 OK`: `{ data: PublicAgent[], total: number, page: number, limit: number }` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/marketplace/agents?q=screener&limit=20" | jq . +``` + +--- + +### GET /marketplace/agents/{agentId} — Get public agent + +**Description**: Returns a single public agent with its DID document included. Returns 404 if the agent is private or inactive. +**Auth**: None. + +**Path parameters**: `agentId` (UUID) + +**Response** `200 OK`: Public agent object including `didDocument` field. + +**Error responses**: 404 `AGENT_NOT_FOUND` + +**curl example**: + +```bash +curl -s "http://localhost:3000/api/v1/marketplace/agents/$AGENT_ID" | jq . +``` diff --git a/openspec/changes/archive/developer-docs-phase6-update/specs/ws2-concepts/spec.md b/openspec/changes/archive/developer-docs-phase6-update/specs/ws2-concepts/spec.md new file mode 100644 index 0000000..db5c54f --- /dev/null +++ b/openspec/changes/archive/developer-docs-phase6-update/specs/ws2-concepts/spec.md @@ -0,0 +1,283 @@ +# WS2 Spec — concepts.md + +## Target file +`docs/developers/concepts.md` + +## Objective +Add six new concept sections to the existing file. Do NOT modify any of the existing sections +(What is AgentIdP?, What is an AI Agent Identity?, AGNTCY Alignment, Agent Lifecycle, +OAuth 2.0 Client Credentials, Free Tier Limits). Append the new sections at the end of the file, +each separated by a `---` horizontal rule and preceded by a level-2 heading. + +--- + +## New section order (append after existing content) + +1. Organizations and Multi-tenancy +2. DID Identity +3. OIDC Provider +4. A2A Delegation +5. API Tier Plans +6. AGNTCY Compliance + +--- + +## Section 1 — Organizations and Multi-tenancy + +**Heading**: `## Organizations and Multi-tenancy` + +**Content to write** (3 paragraphs + table): + +Paragraph 1: +An **organization** is the top-level grouping unit in AgentIdP. Every registered agent can be +scoped to an organization by including an `organization_id` in the agent registration request. +Organizations have a unique `slug` (URL-safe identifier), a display `name`, and a `planTier` +that controls per-org resource limits. All API operations that involve analytics, webhooks, tiers, +and delegation are tenant-scoped: they only see data belonging to their organization. + +Paragraph 2: +**Tenant isolation** is enforced at the service layer. Every query involving multi-tenant data +filters by `organization_id`. A token issued to an agent in org A cannot read data from org B. +The `organization_id` is embedded in the JWT at token issuance time and validated on every +request. This means you do not need to pass an org ID as a query parameter — it is derived +automatically from the authenticated token. + +Paragraph 3: +When you create an organization, you define its `slug`. Slugs are immutable — once set, they +cannot be changed. Choose a slug that matches your domain or product namespace, as it is used +in DID identifiers for agents in that organization. Membership is managed through the +`POST /api/v1/organizations/{orgId}/members` endpoint, which lets you add an existing agent +to an organization with a `member` or `admin` role. + +Table — org field reference: + +| Field | Type | Description | +|-------|------|-------------| +| `organizationId` | UUID | System-assigned immutable identifier | +| `name` | string | Human-readable display name | +| `slug` | string | URL-safe unique identifier (immutable after creation) | +| `planTier` | enum | `free` \| `pro` \| `enterprise` | +| `maxAgents` | integer | Maximum active agents in this org | +| `maxTokensPerMonth` | integer | Maximum token issuances per month | +| `status` | enum | `active` \| `suspended` \| `deleted` | + +--- + +## Section 2 — DID Identity + +**Heading**: `## DID Identity` + +**Content to write** (3 paragraphs + DID structure example): + +Paragraph 1: +Every agent registered in AgentIdP automatically receives a **Decentralized Identifier (DID)** +using the `did:web` method. A DID is a globally unique, self-describing identifier that does not +rely on a central registry. The DID for an agent takes the form +`did:web::agents:` — for example, +`did:web:localhost%3A3000:agents:a1b2c3d4-e5f6-7890-abcd-ef1234567890`. The `did:web` method +means the DID document is resolvable via HTTPS: a resolver fetches +`https:///api/v1/agents//did`. + +Paragraph 2: +The **DID Document** is a JSON-LD object that describes the agent's cryptographic keys and +service endpoints. It contains: the agent's DID as its `id`, a `verificationMethod` array with +the agent's public key in JWK format, an `authentication` array referencing that key, and an +`agntcy` extension object carrying agent metadata (type, capabilities, version, owner, +deploymentEnv). This document is publicly accessible — no authentication required — so any +external system can verify this agent's identity without contacting AgentIdP directly. + +Paragraph 3: +The `did:web` scheme was chosen because it is widely supported by DID resolvers, requires no +blockchain, and leverages standard HTTPS infrastructure. When an external system receives a +token from your agent, it can resolve your agent's DID, retrieve the public key from the DID +Document, and independently verify the token's signature. This is the foundation of +cross-system agent identity verification. + +DID document structure diagram (Markdown code block): + +``` +DID Document structure for a registered agent +─────────────────────────────────────────────── +{ + "@context": ["https://www.w3.org/ns/did/v1"], + "id": "did:web::agents:", + "controller": "did:web::agents:", + "verificationMethod": [ + { + "id": "#key-1", + "type": "JsonWebKey2020", + "controller": "", + "publicKeyJwk": { "kty": "RSA", ... } + } + ], + "authentication": ["#key-1"], + "agntcy": { + "agentId": "", + "agentType": "screener", + "capabilities": ["resume:read"], + "deploymentEnv": "production", + "owner": "talent-team", + "version": "1.0.0" + } +} +``` + +--- + +## Section 3 — OIDC Provider + +**Heading**: `## OIDC Provider` + +**Content to write** (3 paragraphs): + +Paragraph 1: +AgentIdP implements a subset of the **OpenID Connect (OIDC)** protocol, acting as an OIDC +Provider for the agents it manages. This means AgentIdP publishes a standard discovery +document at `GET /.well-known/openid-configuration`, which any OIDC-aware client can use to +discover supported grant types, token endpoint, JWKS URI, and other metadata. It also exposes +a JWKS endpoint at `GET /.well-known/jwks.json` for external systems to retrieve the public +keys used to verify tokens. + +Paragraph 2: +The **`/agent-info` endpoint** is the equivalent of OIDC's UserInfo endpoint — it returns +identity claims for the authenticated agent. External systems that receive a token issued by +AgentIdP can call this endpoint (with that token) to retrieve the agent's verified identity +attributes: its `agentId`, `email`, `agentType`, `capabilities`, and `organization_id`. This +is particularly useful when a downstream service needs to verify the identity of an agent +presenting a token, without duplicating identity data in its own store. + +Paragraph 3: +AgentIdP also supports **OIDC token exchange for GitHub Actions**. If you run your agent +deployment workflows in GitHub Actions, you can configure a trust policy +(`POST /api/v1/oidc/trust-policies`) that maps a GitHub repository and branch to an AgentIdP +agent. The workflow can then exchange its GitHub OIDC JWT for an AgentIdP access token via +`POST /api/v1/oidc/token` — no stored secrets required. This enables keyless, short-lived +token issuance in CI/CD pipelines. + +--- + +## Section 4 — A2A Delegation + +**Heading**: `## A2A Delegation` + +**Content to write** (3 paragraphs + flow diagram): + +Paragraph 1: +**Agent-to-Agent (A2A) delegation** allows one agent to grant another agent a subset of its own +OAuth 2.0 scopes for a limited time. This is the building block for multi-agent pipelines where +an orchestrator agent needs to delegate work to a specialist sub-agent without sharing its own +full credentials. A delegation chain consists of: a delegator (the agent granting authority), +a delegatee (the agent receiving authority), a set of scopes (must be a strict subset of the +delegator's own scopes), and a TTL (60 seconds to 86,400 seconds). + +Paragraph 2: +The **grant flow** is straightforward: the delegator calls `POST /api/v1/oauth2/token/delegate` +with the delegatee's agent ID, the scopes to grant, and the TTL. AgentIdP returns a signed +delegation token. The delegatee presents this token when calling +`POST /api/v1/oauth2/token/verify-delegation` to prove it has been granted authority. AgentIdP +verifies the chain integrity and returns the delegation details including whether it is still +valid. The delegator can revoke the chain at any time via +`DELETE /api/v1/oauth2/token/delegate/{chainId}`. + +Paragraph 3: +Delegation is useful for: workflow handoffs between specialist agents, granting a monitoring +agent read-only access to resources owned by a processing agent, and time-limited cross-agent +authorization without credential sharing. Because delegation tokens are signed and verified +server-side, a delegatee cannot extend the TTL, expand the scope, or pass the delegation to a +third agent. The chain is always exactly two hops: delegator → delegatee. + +Delegation flow (Markdown code block): + +``` +A2A Delegation Flow +─────────────────── +1. Orchestrator (delegator) calls POST /api/v1/oauth2/token/delegate + → body: { delegateeAgentId, scopes: ["agents:read"], ttlSeconds: 3600 } + ← response: { delegationToken: "...", chainId: "...", expiresAt: "..." } + +2. Orchestrator passes delegationToken to the sub-agent out-of-band + +3. Sub-agent (delegatee) calls POST /api/v1/oauth2/token/verify-delegation + → body: { delegationToken: "..." } + ← response: { valid: true, scopes: ["agents:read"], expiresAt: "..." } + +4. Sub-agent uses its own Bearer token + confirmed scope to act on behalf + +5. (Optional) Orchestrator calls DELETE /api/v1/oauth2/token/delegate/{chainId} + to revoke early +``` + +--- + +## Section 5 — API Tier Plans + +**Heading**: `## API Tier Plans` + +**Content to write** (3 paragraphs + table): + +Paragraph 1: +AgentIdP has three subscription tiers: **Free**, **Pro**, and **Enterprise**. Every organization +is on one tier at a time. The tier determines the resource limits enforced at runtime: maximum +number of active agents, maximum API calls per day, and maximum token issuances per day. When a +limit is reached, the relevant operation returns a `403 FREE_TIER_LIMIT_EXCEEDED` error until the +next calendar day resets the counter (for daily limits) or until you upgrade your tier. + +Paragraph 2: +You can check your current tier, configured limits, and live usage at any time by calling +`GET /api/v1/tiers/status`. The response shows your tier name, all three limit values, and the +live usage counters for the current day. If you need higher limits, call +`POST /api/v1/tiers/upgrade` with `{ "target_tier": "pro" }` or `"enterprise"`. This creates a +Stripe Checkout Session and returns a one-time `checkoutUrl`. After payment, the organization's +tier is updated automatically via Stripe webhook. + +Paragraph 3: +Enterprise tier limits are effectively unlimited (enforced as `Infinity` in the tier +configuration). Enterprise customers should contact SentryAgent.ai to arrange billing and +configure custom limits if needed. The `maxAgents` and `maxTokensPerMonth` fields on an +organization record can be overridden at org creation or update to set tighter or looser limits +than the tier defaults, regardless of tier. + +Tier comparison table: + +| Limit | Free | Pro | Enterprise | +|-------|------|-----|------------| +| Max agents | 10 | 100 | Unlimited | +| Max API calls / day | 1,000 | 50,000 | Unlimited | +| Max token issuances / day | 1,000 | 50,000 | Unlimited | +| Audit log retention | 90 days | 90 days | 90 days | +| Webhooks | Yes | Yes | Yes | +| Analytics | Yes | Yes | Yes | +| A2A Delegation | Yes | Yes | Yes | + +--- + +## Section 6 — AGNTCY Compliance + +**Heading**: `## AGNTCY Compliance` + +**Content to write** (3 paragraphs): + +Paragraph 1: +**AGNTCY** is an open standard from the Linux Foundation that defines how AI agents should be +identified, described, and governed across platforms. AgentIdP implements AGNTCY compliance +in two ways: every agent automatically gets a DID and an agent card (a structured JSON object +that describes the agent in the AGNTCY format), and AgentIdP can generate a **compliance +report** that summarizes the verified state of all agents in a tenant. An agent card is the +AGNTCY equivalent of a business card — it carries the agent's DID, type, capabilities, owner, +version, and identity provider. + +Paragraph 2: +The **compliance report** (available at `GET /api/v1/compliance/report`) covers two dimensions: +agent-identity verification (are all active agents reachable via their DID?) and audit-trail +integrity (is the hash chain of audit events intact?). The report includes a boolean +`agntcyConformance` field that summarizes whether the tenant meets AGNTCY baseline requirements. +Reports are cached in Redis for 5 minutes; the `X-Cache: HIT` header signals a cached response. + +Paragraph 3: +For self-auditing and external audits, you can export all active agents as AGNTCY agent cards +in bulk via `GET /api/v1/compliance/agent-cards`. This is an array of card objects that +external compliance tools and AGNTCY-compatible registries can ingest directly. The +`GET /api/v1/compliance/controls` endpoint (no authentication required) provides a live +status snapshot of all SOC 2 Trust Services Criteria controls that AgentIdP monitors internally. +These endpoints are gated by the `COMPLIANCE_ENABLED` environment variable; if disabled, they +return `404`. diff --git a/openspec/changes/archive/developer-docs-phase6-update/specs/ws3-quick-start/spec.md b/openspec/changes/archive/developer-docs-phase6-update/specs/ws3-quick-start/spec.md new file mode 100644 index 0000000..2fd5b0b --- /dev/null +++ b/openspec/changes/archive/developer-docs-phase6-update/specs/ws3-quick-start/spec.md @@ -0,0 +1,212 @@ +# WS3 Spec — quick-start.md + +## Target file +`docs/developers/quick-start.md` + +## Objective +The quick start must be updated so that creating an organization becomes Step 1, since agents +can now be scoped to an organization. All steps are renumbered accordingly. All endpoint paths +must be verified against current routes. The new step order is: + +1. Clone and configure (unchanged) +2. Start infrastructure (unchanged) +3. Start the AgentIdP server (unchanged) +4. Generate a bootstrap token (unchanged) +5. **[NEW]** Create an organization +6. Register an agent (update to include `organization_id`) +7. Generate a credential (renumbered, content unchanged) +8. Issue an access token (renumbered, content unchanged) + +The "What's next" section at the bottom must be updated to include new guides. + +--- + +## Surgical edits — exact changes to make + +### Edit 1 — Update the title paragraph + +**Find** (exact text): +``` +This guide gets you from zero to a working agent identity with a valid OAuth 2.0 access token. It takes under 5 minutes. +``` + +**Replace with**: +``` +This guide gets you from zero to a working agent identity inside an organization, with a valid OAuth 2.0 access token. It takes under 5 minutes. +``` + +--- + +### Edit 2 — Renumber Step 4 and insert new Step 5 + +After the existing Step 4 section (Generate a bootstrap token), which ends with: + +``` +> This bootstrap token is a one-time tool for registering your first agent. Once you have an agent with credentials, use `POST /token` for all subsequent authentication. +``` + +Insert a `---` separator followed by the new **Step 5 — Create an organization** section: + +```markdown +--- + +## Step 5 — Create an organization + +Agents are scoped to organizations. Create one now so your agent has an `organization_id` to belong to: + +```bash +curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Authorization: Bearer $BOOTSTRAP_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "My AI Project", + "slug": "my-ai-project" + }' | jq . +``` + +Example response (`201 Created`): + +```json +{ + "organizationId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "name": "My AI Project", + "slug": "my-ai-project", + "planTier": "free", + "maxAgents": 10, + "maxTokensPerMonth": 10000, + "status": "active", + "createdAt": "2026-04-04T09:00:00.000Z", + "updatedAt": "2026-04-04T09:00:00.000Z" +} +``` + +Save the `organizationId`: + +```bash +export ORG_ID="org-0a1b2c3d-e4f5-6789-abcd-ef0123456789" +``` +``` + +--- + +### Edit 3 — Renumber the agent registration step + +**Find** (heading only): +``` +## Step 5 — Register an agent +``` + +**Replace with**: +``` +## Step 6 — Register an agent +``` + +--- + +### Edit 4 — Update the agent registration curl command to include organization_id + +**Find** (exact curl command): +```bash +curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Authorization: Bearer $BOOTSTRAP_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "email": "my-first-agent@myproject.ai", + "agentType": "custom", + "version": "1.0.0", + "capabilities": ["data:read"], + "owner": "my-team", + "deploymentEnv": "development" + }' | jq . +``` + +**Replace with**: +```bash +curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Authorization: Bearer $BOOTSTRAP_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "email": "my-first-agent@myproject.ai", + "agentType": "custom", + "version": "1.0.0", + "capabilities": ["data:read"], + "owner": "my-team", + "deploymentEnv": "development", + "organization_id": "'$ORG_ID'" + }' | jq . +``` + +--- + +### Edit 5 — Renumber Step 6 (credential generation) + +**Find** (heading only): +``` +## Step 6 — Generate a credential +``` + +**Replace with**: +``` +## Step 7 — Generate a credential +``` + +--- + +### Edit 6 — Renumber Step 7 (token issuance) + +**Find** (heading only): +``` +## Step 7 — Issue an access token +``` + +**Replace with**: +``` +## Step 8 — Issue an access token +``` + +--- + +### Edit 7 — Update the "What's next" section + +**Find** (entire What's next section): +```markdown +## What's next + +- [Core Concepts](concepts.md) — understand AgentIdP, AGNTCY, and the agent identity model +- [Guides](guides/README.md) — step-by-step walkthroughs for credentials, tokens, and audit logs +- [API Reference](api-reference.md) — every endpoint documented with curl examples +``` + +**Replace with**: +```markdown +## What's next + +- [Core Concepts](concepts.md) — understand AgentIdP, AGNTCY, orgs, DID, delegation, and tiers +- [Guides](guides/README.md) — step-by-step walkthroughs for all workflows +- [API Reference](api-reference.md) — every endpoint documented with curl examples + +**New guides for Phase 6 features:** + +- [Use the Analytics Dashboard](guides/use-analytics-dashboard.md) — query token trends and activity +- [Manage API Tiers](guides/manage-api-tiers.md) — check limits and upgrade your plan +- [A2A Delegation](guides/a2a-delegation.md) — delegate authority between agents +- [Configure Webhooks](guides/configure-webhooks.md) — subscribe to real-time events +- [AGNTCY Compliance](guides/agntcy-compliance.md) — export agent cards and generate compliance reports +``` + +--- + +## Verification checklist + +The Developer must verify all curl paths in the updated file match these current routes: + +| Action | Current endpoint | +|--------|-----------------| +| Create org | `POST /api/v1/organizations` | +| Register agent | `POST /api/v1/agents` | +| Generate credential | `POST /api/v1/agents/{agentId}/credentials` | +| Issue token | `POST /api/v1/token` | + +The bootstrap token scopes must include `agents:write` and `organizations:write` (or the equivalent +OPA-enforced policy). The bootstrap token generation node script in Step 4 currently uses +`agents:read agents:write tokens:read audit:read`. This is unchanged — no edit needed there. diff --git a/openspec/changes/archive/developer-docs-phase6-update/specs/ws4-guides/spec.md b/openspec/changes/archive/developer-docs-phase6-update/specs/ws4-guides/spec.md new file mode 100644 index 0000000..4639af7 --- /dev/null +++ b/openspec/changes/archive/developer-docs-phase6-update/specs/ws4-guides/spec.md @@ -0,0 +1,1079 @@ +# WS4 Spec — guides/ + +## Target files +- `docs/developers/guides/README.md` — update index table +- `docs/developers/guides/register-an-agent.md` — surgical update +- `docs/developers/guides/manage-credentials.md` — surgical update +- `docs/developers/guides/issue-and-revoke-tokens.md` — surgical update +- `docs/developers/guides/query-audit-logs.md` — surgical update +- `docs/developers/guides/use-analytics-dashboard.md` — create new +- `docs/developers/guides/manage-api-tiers.md` — create new +- `docs/developers/guides/a2a-delegation.md` — create new +- `docs/developers/guides/configure-webhooks.md` — create new +- `docs/developers/guides/agntcy-compliance.md` — create new + +--- + +## Part A — Updates to existing guides + +### A1. guides/README.md + +**Find** (entire table): +```markdown +| Guide | What it covers | +|-------|----------------| +| [Register an Agent](register-an-agent.md) | All registration fields, validation rules, common errors and fixes | +| [Manage Credentials](manage-credentials.md) | Generate, list, rotate, and revoke credentials | +| [Issue and Revoke Tokens](issue-and-revoke-tokens.md) | OAuth 2.0 Client Credentials flow, JWT structure, introspect, revoke | +| [Query Audit Logs](query-audit-logs.md) | Filters, pagination, event structure, 90-day retention | +``` + +**Replace with** (adding 5 new rows): +```markdown +| Guide | What it covers | +|-------|----------------| +| [Register an Agent](register-an-agent.md) | All registration fields, organization scoping, validation rules, common errors | +| [Manage Credentials](manage-credentials.md) | Generate, list, rotate, and revoke credentials | +| [Issue and Revoke Tokens](issue-and-revoke-tokens.md) | OAuth 2.0 Client Credentials flow, JWT structure, introspect, revoke | +| [Query Audit Logs](query-audit-logs.md) | Filters, pagination, event structure, 90-day retention | +| [Use the Analytics Dashboard](use-analytics-dashboard.md) | Query token trends, agent activity heatmap, and per-agent usage | +| [Manage API Tiers](manage-api-tiers.md) | Check current tier, understand limits, trigger a Stripe upgrade | +| [A2A Delegation](a2a-delegation.md) | Create and verify agent-to-agent delegation chains | +| [Configure Webhooks](configure-webhooks.md) | Subscribe to events, understand delivery guarantees, inspect history | +| [AGNTCY Compliance](agntcy-compliance.md) | Export agent cards, generate compliance reports, verify audit chain | +``` + +--- + +### A2. guides/register-an-agent.md + +#### Edit A2a — Add `organization_id` to request fields table + +**Find** (last row of the request fields table): +```markdown +| `deploymentEnv` | string (enum) | Yes | Target deployment environment. See values below. | +``` + +**Replace with**: +```markdown +| `deploymentEnv` | string (enum) | Yes | Target deployment environment. See values below. | +| `organization_id` | string (UUID) | No | UUID of the organization to scope this agent to. Recommended on all multi-tenant instances. | +``` + +#### Edit A2b — Update the example curl command to include organization_id + +**Find** (the curl command in the example section): +```bash +curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "email": "screener-001@talent.ai", + "agentType": "screener", + "version": "1.0.0", + "capabilities": ["resume:read", "email:send", "candidate:score"], + "owner": "talent-acquisition-team", + "deploymentEnv": "production" + }' | jq . +``` + +**Replace with**: +```bash +curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "email": "screener-001@talent.ai", + "agentType": "screener", + "version": "1.0.0", + "capabilities": ["resume:read", "email:send", "candidate:score"], + "owner": "talent-acquisition-team", + "deploymentEnv": "production", + "organization_id": "'$ORG_ID'" + }' | jq . +``` + +#### Edit A2c — Add note about organization_id after the curl example + +After the "The `agentId` is assigned by the system — it is immutable and never changes." line, +insert: + +```markdown +> **Organization scoping**: If you include `organization_id` in the request, the agent is +> associated with that organization. Analytics, webhook events, and tier enforcement are all +> scoped by organization. To create an organization first, see the +> [Quick Start](../quick-start.md) guide. +``` + +--- + +### A3. guides/manage-credentials.md + +No content changes needed to this guide. All endpoint paths are current: +- `POST /api/v1/agents/{agentId}/credentials` — correct +- `GET /api/v1/agents/{agentId}/credentials` — correct +- `POST /api/v1/agents/{agentId}/credentials/{credentialId}/rotate` — correct +- `DELETE /api/v1/agents/{agentId}/credentials/{credentialId}` — correct + +#### Edit A3a — Add a note about org-scoped token issuance at the top + +After the first paragraph ("A credential is a `client_id` + `client_secret` pair..."), insert: + +```markdown +> **Multi-tenant note**: Credentials issued for an agent that belongs to an organization will +> produce tokens carrying an `organization_id` claim. This claim is required by analytics, +> webhooks, tier enforcement, and A2A delegation. Ensure your agent is registered with +> `organization_id` before issuing credentials for production use. +``` + +--- + +### A4. guides/issue-and-revoke-tokens.md + +No endpoint path changes needed. Paths are current: +- `POST /api/v1/token` — correct +- `POST /api/v1/token/introspect` — correct +- `POST /api/v1/token/revoke` — correct + +#### Edit A4a — Expand available scopes table + +**Find** (the scopes table): +```markdown +| Scope | What it allows | +|-------|----------------| +| `agents:read` | Read agent records | +| `agents:write` | Create, update, decommission agents | +| `tokens:read` | Introspect tokens | +| `audit:read` | Query audit logs | +``` + +**Replace with** (add new scopes for Phase 6 features): +```markdown +| Scope | What it allows | +|-------|----------------| +| `agents:read` | Read agent identity records | +| `agents:write` | Create, update, and decommission agents | +| `tokens:read` | Introspect tokens | +| `audit:read` | Query audit logs and verify audit chain integrity | +| `webhooks:read` | List webhook subscriptions and delivery history | +| `webhooks:write` | Create, update, and delete webhook subscriptions | +| `admin:orgs` | Manage organizations and federation partners | +``` + +--- + +### A5. guides/query-audit-logs.md + +No endpoint path changes needed. Paths are current: +- `GET /api/v1/audit` — correct +- `GET /api/v1/audit/{eventId}` — correct + +#### Edit A5a — Add audit/verify to the action table + +After the existing action table (the "What gets logged" section), add a note: + +```markdown +> **Audit chain verification**: In addition to querying events, you can verify the cryptographic +> integrity of the entire audit hash chain via `GET /api/v1/audit/verify`. This endpoint requires +> `audit:read` scope and is rate-limited to 30 requests/min. See the +> [API Reference](../api-reference.md#get-auditverify---verify-audit-chain-integrity) for details. +``` + +--- + +## Part B — New guide files + +### B1. guides/use-analytics-dashboard.md + +Write a complete new file with this content: + +```markdown +# Use the Analytics Dashboard + +This guide explains how to query the three analytics endpoints to understand your organization's +token usage and agent activity patterns. + +All analytics endpoints require Bearer token authentication and are scoped to the organization +embedded in your token. + +--- + +## Prerequisites + +- A running AgentIdP instance +- A valid Bearer token with `organization_id` in its claims +- At least one agent registered and some token issuance activity + +--- + +## Token issuance trend + +`GET /api/v1/analytics/tokens` + +Returns daily token issuance counts for the past N days (default 30, max 90). Use this to +track usage growth, identify traffic spikes, and plan capacity. + +```bash +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=30" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "days": 30, + "data": [ + { "date": "2026-03-06", "count": 142 }, + { "date": "2026-03-07", "count": 198 }, + { "date": "2026-03-08", "count": 0 } + ] +} +``` + +**Interpreting the data**: Each item in `data` is one calendar day (UTC) with the number of +tokens issued on that day. Days with zero issuance are included with `count: 0`. The array +is ordered chronologically, oldest first. + +**Using it**: Compare day-over-day counts to identify growth or anomalies. A sudden spike in +`count` may indicate an agent retry loop or a credential leak. Zero-count days during expected +operation may indicate a deployment issue. + +**Query parameter**: `days` — positive integer, max 90. Returns `400 VALIDATION_ERROR` if +exceeded. + +```bash +# Last 7 days +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=7" \ + -H "Authorization: Bearer $TOKEN" | jq . + +# Last 90 days (maximum) +curl -s "http://localhost:3000/api/v1/analytics/tokens?days=90" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Agent activity heatmap + +`GET /api/v1/analytics/agents/activity` + +Returns request counts grouped by day-of-week (0 = Sunday, 6 = Saturday) and hour (0–23, UTC). +Use this to identify peak usage windows for capacity planning and rate limit tuning. + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents/activity" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "data": [ + { "dow": 1, "hour": 9, "count": 54 }, + { "dow": 1, "hour": 10, "count": 87 }, + { "dow": 3, "hour": 14, "count": 201 } + ] +} +``` + +**Interpreting the data**: `dow` is 0 (Sunday) through 6 (Saturday). `hour` is 0–23 UTC. +Only non-zero cells are returned — missing combinations had zero activity. Sort by `count` +descending to find your peak windows. + +**Using it**: If most activity is on weekday mornings UTC, ensure your rate limit headroom +covers that window. If weekend activity is unexpectedly high, investigate which agents are +active. + +--- + +## Per-agent usage summary + +`GET /api/v1/analytics/agents` + +Returns token issuance counts per agent for the current calendar month (UTC). Use this to +identify your most active agents and check if any single agent is consuming a +disproportionate share of your monthly token budget. + +```bash +curl -s "http://localhost:3000/api/v1/analytics/agents" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "month": "2026-04", + "data": [ + { "agentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "tokenCount": 312 }, + { "agentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", "tokenCount": 87 } + ] +} +``` + +**Interpreting the data**: Each item shows an agent UUID and the number of tokens it has +issued this month. The response covers the full current calendar month from day 1 to now. +It resets on the first day of each month. + +**Using it**: Cross-reference `agentId` values against `GET /api/v1/agents` to identify which +agents by name. If one agent accounts for >80% of usage, investigate whether it is token +caching correctly or requesting tokens unnecessarily. +``` + +--- + +### B2. guides/manage-api-tiers.md + +Write a complete new file with this content: + +```markdown +# Manage API Tiers + +This guide explains how to check your organization's current plan tier, understand the enforced +limits, and initiate an upgrade via Stripe. + +--- + +## Prerequisites + +- A running AgentIdP instance +- A valid Bearer token with `organization_id` in its claims + +--- + +## Check current tier status + +`GET /api/v1/tiers/status` + +Returns your organization's tier, the configured limits, and live usage counters for today. + +```bash +curl -s "http://localhost:3000/api/v1/tiers/status" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "tier": "free", + "limits": { + "maxAgents": 10, + "maxCallsPerDay": 1000, + "maxTokensPerDay": 1000 + }, + "usage": { + "agentCount": 3, + "callsToday": 142, + "tokensToday": 87 + } +} +``` + +**Understanding the fields**: + +| Field | Description | +|-------|-------------| +| `tier` | Current plan: `free`, `pro`, or `enterprise` | +| `limits.maxAgents` | Maximum active (non-decommissioned) agents allowed | +| `limits.maxCallsPerDay` | Maximum total API calls per calendar day (UTC) | +| `limits.maxTokensPerDay` | Maximum token issuances per calendar day (UTC) | +| `usage.agentCount` | Current number of active agents | +| `usage.callsToday` | API calls made so far today | +| `usage.tokensToday` | Tokens issued so far today | + +**When limits are reached**: The relevant endpoint returns `403 FREE_TIER_LIMIT_EXCEEDED`. +Daily counters reset at midnight UTC. The agent count limit is a current count, not a daily +counter — decommissioning an agent immediately frees capacity. + +--- + +## Tier comparison + +| Limit | Free | Pro | Enterprise | +|-------|------|-----|------------| +| Max agents | 10 | 100 | Unlimited | +| Max API calls / day | 1,000 | 50,000 | Unlimited | +| Max token issuances / day | 1,000 | 50,000 | Unlimited | + +--- + +## Upgrade your tier + +`POST /api/v1/tiers/upgrade` + +Creates a Stripe Checkout Session and returns a one-time URL. Complete the payment in the +browser to upgrade your organization's tier. + +```bash +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "pro" }' | jq . +``` + +Response: + +```json +{ + "checkoutUrl": "https://checkout.stripe.com/pay/cs_live_a1b2c3d4e5f6..." +} +``` + +Open `checkoutUrl` in a browser to complete payment. After successful payment, Stripe sends a +webhook to AgentIdP which automatically upgrades your organization's tier. + +**Constraints**: +- `target_tier` must be `pro` or `enterprise` +- `target_tier` must be higher than your current tier (you cannot downgrade via this endpoint) +- Attempting to upgrade to the current or a lower tier returns `400 VALIDATION_ERROR` + +```bash +# Upgrade from free to pro +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "pro" }' | jq . + +# Upgrade from pro to enterprise +curl -s -X POST http://localhost:3000/api/v1/tiers/upgrade \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "target_tier": "enterprise" }' | jq . +``` + +--- + +## Common errors + +### `400 VALIDATION_ERROR` — target_tier missing or invalid + +```json +{ + "code": "VALIDATION_ERROR", + "message": "target_tier must be one of: free, pro, enterprise.", + "details": { "received": "premium" } +} +``` + +**Fix**: Use `"pro"` or `"enterprise"`. + +### `400 TIER_UPGRADE_NOT_REQUIRED` — not an upgrade + +**Fix**: You are already on this tier or a higher tier. Check `GET /api/v1/tiers/status` first. + +### `401 UNAUTHORIZED` — token lacks organization_id + +The tier endpoints require a token with an `organization_id` claim. Use a token issued by an +agent that was registered with `organization_id`. Tokens issued via the bootstrap method +(without an org) do not carry `organization_id` and will fail. +``` + +--- + +### B3. guides/a2a-delegation.md + +Write a complete new file with this content: + +```markdown +# A2A Delegation + +Agent-to-Agent (A2A) delegation lets one agent grant another agent a subset of its OAuth 2.0 +scopes for a defined period. This is the foundation for building secure multi-agent pipelines +where an orchestrator agent coordinates specialist sub-agents. + +--- + +## Prerequisites + +- A running AgentIdP instance +- Two registered agents: the delegator (has a Bearer token) and the delegatee (knows its + `agentId`) +- The delegator's scopes must be a superset of the scopes it wants to delegate + +--- + +## How delegation works + +``` +Delegator agent Delegatee agent + | | + |-- POST /oauth2/token/delegate ----------->| (creates chain server-side) + |<-- { delegationToken, chainId, scopes } --| + | | + |-- passes delegationToken out-of-band ---->| + | | + | POST /oauth2/token/verify-delegation + | <-- { valid: true, scopes, expiresAt } + | | + | (optional) DELETE /oauth2/token/delegate/{chainId} +``` + +--- + +## Step 1 — Create a delegation chain + +The delegator agent creates the chain by specifying the delegatee's `agentId`, the scopes to +delegate (must be a strict subset of the delegator's own scopes), and the TTL in seconds. + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/delegate \ + -H "Authorization: Bearer $DELEGATOR_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "delegateeAgentId": "'$DELEGATEE_AGENT_ID'", + "scopes": ["agents:read"], + "ttlSeconds": 3600 + }' | jq . +``` + +Response (`201 Created`): + +```json +{ + "delegationToken": "sa_del_a1b2c3d4e5f6...", + "chainId": "d4e5f6a7-b8c9-0123-def0-123456789abc", + "delegatorAgentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "delegateeAgentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", + "scopes": ["agents:read"], + "expiresAt": "2026-04-04T10:00:00.000Z" +} +``` + +Save the `delegationToken` and `chainId`: + +```bash +export DELEGATION_TOKEN="sa_del_a1b2c3d4e5f6..." +export CHAIN_ID="d4e5f6a7-b8c9-0123-def0-123456789abc" +``` + +**TTL constraints**: minimum 60 seconds, maximum 86400 seconds (24 hours). Choose the minimum +TTL that covers the delegatee's task. + +--- + +## Step 2 — Pass the delegation token to the delegatee + +Pass `DELEGATION_TOKEN` to the delegatee agent out-of-band. This can be via a shared queue, +a direct API call to the sub-agent, or any other channel. The token is a signed opaque string — +do not parse it; treat it as an opaque credential. + +--- + +## Step 3 — Verify the delegation token + +The delegatee (or any agent checking the delegation) calls the verify endpoint. This confirms +the chain is valid and not expired or revoked. + +```bash +curl -s -X POST http://localhost:3000/api/v1/oauth2/token/verify-delegation \ + -H "Authorization: Bearer $DELEGATEE_TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "delegationToken": "'$DELEGATION_TOKEN'" }' | jq . +``` + +Response (`200 OK` — valid delegation): + +```json +{ + "valid": true, + "chainId": "d4e5f6a7-b8c9-0123-def0-123456789abc", + "delegatorAgentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "delegateeAgentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", + "scopes": ["agents:read"], + "issuedAt": "2026-04-04T09:00:00.000Z", + "expiresAt": "2026-04-04T10:00:00.000Z", + "revokedAt": null +} +``` + +Response (`200 OK` — expired delegation): + +```json +{ + "valid": false, + "chainId": "d4e5f6a7-b8c9-0123-def0-123456789abc", + "delegatorAgentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "delegateeAgentId": "b2c3d4e5-f6a7-8901-bcde-f12345678901", + "scopes": ["agents:read"], + "issuedAt": "2026-04-03T09:00:00.000Z", + "expiresAt": "2026-04-03T10:00:00.000Z", + "revokedAt": null +} +``` + +> The verify endpoint always returns `200 OK`. Check the `valid` field — it is never an error +> response for an expired or revoked token. + +--- + +## Step 4 — (Optional) Revoke the delegation early + +If the delegatee has completed its task and you want to revoke the delegation before it expires, +the delegator calls: + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/oauth2/token/delegate/$CHAIN_ID" \ + -H "Authorization: Bearer $DELEGATOR_TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +Expected response: `204` (no body). + +After revocation, verify requests for this chain return `{ "valid": false, "revokedAt": "" }`. + +--- + +## Scope rules + +- Delegated scopes must be a strict subset of the delegator's own token scopes +- You cannot delegate scopes you do not have +- You cannot delegate to yourself (delegateeAgentId must differ from delegatorAgentId) +- Delegation is not transitive — a delegatee cannot re-delegate to a third agent + +--- + +## Common errors + +### `400 VALIDATION_ERROR` — scope not a subset + +The delegator attempted to delegate a scope it does not hold. Check `GET /api/v1/token/introspect` +to confirm which scopes your token carries. + +### `400 VALIDATION_ERROR` — ttlSeconds out of range + +Min: 60, Max: 86400. Values outside this range return a validation error. +``` + +--- + +### B4. guides/configure-webhooks.md + +Write a complete new file with this content: + +```markdown +# Configure Webhooks + +Webhooks let AgentIdP push real-time events to your application when agents, credentials, or +tokens change state. This guide covers creating subscriptions, the available event types, +delivery guarantees, and how to inspect delivery history. + +--- + +## Prerequisites + +- A running AgentIdP instance +- A valid Bearer token with `organization_id` in its claims +- A publicly reachable HTTPS endpoint to receive events (for local development, use a tool + like [ngrok](https://ngrok.com)) + +--- + +## Available event types + +| Event type | Triggered when | +|-----------|----------------| +| `agent.created` | A new agent is registered | +| `agent.updated` | An agent's metadata is updated | +| `agent.suspended` | An agent's status changes to `suspended` | +| `agent.reactivated` | An agent's status changes from `suspended` to `active` | +| `agent.decommissioned` | An agent is decommissioned | +| `credential.generated` | New credentials are created for an agent | +| `credential.rotated` | A credential's secret is rotated | +| `credential.revoked` | A credential is revoked | +| `token.issued` | An access token is issued | +| `token.revoked` | An access token is revoked | + +--- + +## Create a subscription + +`POST /api/v1/webhooks` + +```bash +curl -s -X POST http://localhost:3000/api/v1/webhooks \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "prod-agent-events", + "url": "https://my-app.example.com/hooks/sentryagent", + "events": ["agent.created", "agent.decommissioned", "token.issued"] + }' | jq . +``` + +Response (`201 Created`): + +```json +{ + "id": "wh-1a2b3c4d-e5f6-7890-abcd-ef1234567890", + "organization_id": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "name": "prod-agent-events", + "url": "https://my-app.example.com/hooks/sentryagent", + "events": ["agent.created", "agent.decommissioned", "token.issued"], + "active": true, + "signingSecret": "whsec_a1b2c3d4e5f6789...", + "failure_count": 0, + "created_at": "2026-04-04T09:00:00.000Z", + "updated_at": "2026-04-04T09:00:00.000Z" +} +``` + +> **Save the `signingSecret` now.** It is shown once. Use it to verify the HMAC-SHA256 +> signature on incoming webhook requests. See "Verifying delivery signatures" below. + +```bash +export WEBHOOK_ID="wh-1a2b3c4d-e5f6-7890-abcd-ef1234567890" +export SIGNING_SECRET="whsec_a1b2c3d4e5f6789..." +``` + +--- + +## Webhook payload format + +Every delivery sends a POST to your URL with `Content-Type: application/json` and this body: + +```json +{ + "id": "evt-uuid-here", + "event": "agent.created", + "timestamp": "2026-04-04T09:00:00.000Z", + "organization_id": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "data": { + "agentId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "email": "screener-001@talent.ai", + "agentType": "screener" + } +} +``` + +The `data` object contains event-specific fields. For `agent.*` events it includes agent +metadata. For `credential.*` events it includes `credentialId` and `agentId`. For `token.*` +events it includes `agentId` and `scope`. + +--- + +## Verifying delivery signatures + +AgentIdP signs every delivery with HMAC-SHA256 using your `signingSecret`. The signature is +in the `X-SentryAgent-Signature` header as `sha256=`. + +Verify it in Node.js: + +```javascript +const crypto = require('crypto'); + +function verifySignature(rawBody, signingSecret, signatureHeader) { + const expected = 'sha256=' + crypto + .createHmac('sha256', signingSecret) + .update(rawBody) + .digest('hex'); + return crypto.timingSafeEqual( + Buffer.from(expected), + Buffer.from(signatureHeader) + ); +} +``` + +Always verify the signature before processing the event. Reject requests with invalid signatures +with `401 Unauthorized`. + +--- + +## Delivery guarantees and retry policy + +- AgentIdP delivers each event **at least once** — your endpoint may receive duplicates +- Use the `id` field to deduplicate events +- Delivery is attempted immediately; on failure, retries use exponential backoff +- After repeated failures, the delivery moves to `dead_letter` status +- Subscriptions with high `failure_count` may be automatically disabled + +Delivery statuses: `pending` → `delivered` (success) or `failed` (attempt failed) → `dead_letter` +(all retries exhausted) + +--- + +## List subscriptions + +```bash +curl -s "http://localhost:3000/api/v1/webhooks" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +--- + +## Pause or resume a subscription + +To pause (disable) a subscription without deleting it: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "active": false }' | jq . +``` + +To resume: + +```bash +curl -s -X PATCH "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ "active": true }' | jq . +``` + +--- + +## Inspect delivery history + +`GET /api/v1/webhooks/{id}/deliveries` + +```bash +curl -s "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID/deliveries?limit=20&offset=0" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response: + +```json +{ + "deliveries": [ + { + "id": "del-uuid", + "subscription_id": "wh-uuid", + "event_type": "agent.created", + "payload": { ... }, + "status": "delivered", + "http_status_code": 200, + "attempt_count": 1, + "next_retry_at": null, + "delivered_at": "2026-04-04T09:00:01.000Z", + "created_at": "2026-04-04T09:00:00.000Z", + "updated_at": "2026-04-04T09:00:01.000Z" + } + ], + "total": 47, + "limit": 20, + "offset": 0 +} +``` + +Use `offset` to paginate through delivery history. Increase `limit` to retrieve more records +per page (the server default is 20). + +--- + +## Delete a subscription + +```bash +curl -s -X DELETE "http://localhost:3000/api/v1/webhooks/$WEBHOOK_ID" \ + -H "Authorization: Bearer $TOKEN" \ + -o /dev/null -w "%{http_code}\n" +``` + +Expected response: `204`. This permanently deletes the subscription and all its delivery records. +``` + +--- + +### B5. guides/agntcy-compliance.md + +Write a complete new file with this content: + +```markdown +# AGNTCY Compliance + +This guide explains how to use AgentIdP's AGNTCY compliance features: exporting agent cards, +generating compliance reports, verifying audit chain integrity, and checking SOC 2 control status. + +--- + +## Prerequisites + +- A running AgentIdP instance +- `COMPLIANCE_ENABLED` environment variable not set to `false` (enabled by default) +- A valid Bearer token (for authenticated endpoints) +- At least one registered agent + +--- + +## What is AGNTCY? + +AGNTCY is an open standard from the Linux Foundation for AI agent identity and governance. +AgentIdP implements AGNTCY by giving every agent a DID and an agent card. The compliance +endpoints let you export and report on that data in structured, auditable formats. + +--- + +## Export agent cards + +`GET /api/v1/compliance/agent-cards` + +Exports all active agents in your organization as AGNTCY-standard agent card JSON objects. +Suitable for ingestion by external compliance tools or AGNTCY-compatible registries. + +```bash +curl -s "http://localhost:3000/api/v1/compliance/agent-cards" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response (`200 OK`): Array of agent card objects. + +```json +[ + { + "did": "did:web:localhost%3A3000:agents:a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "name": "screener-001@talent.ai", + "agentType": "screener", + "capabilities": ["resume:read", "email:send"], + "owner": "talent-team", + "version": "1.0.0", + "deploymentEnv": "production", + "identityProvider": "https://sentryagent.ai", + "issuedAt": "2026-04-04T09:00:00.000Z" + } +] +``` + +**Use cases**: +- Share with external auditors to demonstrate your agent fleet +- Import into AGNTCY-compatible discovery registries +- Baseline snapshot before and after deployments + +Save the output to a file: + +```bash +curl -s "http://localhost:3000/api/v1/compliance/agent-cards" \ + -H "Authorization: Bearer $TOKEN" > agent-cards-$(date +%Y%m%d).json +``` + +--- + +## Generate a compliance report + +`GET /api/v1/compliance/report` + +Generates an AGNTCY compliance report for your tenant. The report is cached for 5 minutes +(check the `X-Cache` header to see if the response is fresh or cached). + +```bash +curl -s "http://localhost:3000/api/v1/compliance/report" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response (`200 OK`): + +```json +{ + "tenantId": "org-0a1b2c3d-e4f5-6789-abcd-ef0123456789", + "generatedAt": "2026-04-04T09:00:00.000Z", + "agntcyConformance": true, + "agentCount": 12, + "verifiedAgentCount": 12, + "auditChainIntegrity": true, + "from_cache": false +} +``` + +**Interpreting the fields**: + +| Field | Description | +|-------|-------------| +| `agntcyConformance` | `true` if all agents have valid DIDs and the audit chain is intact | +| `agentCount` | Total active agents in the organization | +| `verifiedAgentCount` | Agents with a resolvable DID document | +| `auditChainIntegrity` | `true` if the audit event hash chain has not been tampered with | +| `from_cache` | `true` if served from Redis cache (up to 5 minutes old) | + +**Force a fresh report**: Wait 5 minutes for the cache to expire. The `from_cache: false` +response is always freshly generated. + +--- + +## Verify audit chain integrity + +`GET /api/v1/audit/verify` + +Verifies that the cryptographic hash chain of audit events is intact. Returns `verified: true` +if no tampering is detected. Rate limited to 30 requests/minute (computationally intensive). + +Requires: Bearer token with `audit:read` scope. + +```bash +curl -s "http://localhost:3000/api/v1/audit/verify" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +Response (`200 OK`): + +```json +{ + "verified": true, + "checkedCount": 1247, + "fromDate": null, + "toDate": null +} +``` + +Verify a specific date window: + +```bash +curl -s "http://localhost:3000/api/v1/audit/verify?fromDate=2026-03-01T00:00:00.000Z&toDate=2026-03-31T23:59:59.999Z" \ + -H "Authorization: Bearer $TOKEN" | jq . +``` + +**Interpreting the result**: +- `verified: true` — no tampering detected in the checked window +- `verified: false` — the hash chain has a broken link; contact SentryAgent.ai support +- `checkedCount` — number of audit events verified + +--- + +## Check SOC 2 control status (public) + +`GET /api/v1/compliance/controls` + +Returns the live status of all SOC 2 Trust Services Criteria controls. No authentication +required. Responses are cached by CDN/proxies for 60 seconds (`Cache-Control: public, max-age=60`). + +```bash +curl -s "http://localhost:3000/api/v1/compliance/controls" | jq . +``` + +Response (`200 OK`): + +```json +{ + "controls": [ + { + "id": "CC6.1", + "name": "Logical Access Controls", + "status": "pass", + "lastChecked": "2026-04-04T08:00:00.000Z" + }, + { + "id": "CC7.2", + "name": "System Monitoring", + "status": "pass", + "lastChecked": "2026-04-04T08:00:00.000Z" + } + ] +} +``` + +Each control has a `status` of `pass`, `fail`, or `unknown`. Status is updated by background +jobs that run periodically. This endpoint is suitable for embedding in external status pages +or compliance dashboards without sharing API credentials. + +--- + +## When compliance endpoints are disabled + +If `COMPLIANCE_ENABLED=false` is set in the server environment, the AGNTCY compliance endpoints +(`/compliance/report` and `/compliance/agent-cards`) return `404 COMPLIANCE_DISABLED`. The SOC 2 +endpoints (`/compliance/controls` and `/audit/verify`) are never gated and always active. +``` diff --git a/openspec/changes/archive/developer-docs-phase6-update/specs/ws5-readme/spec.md b/openspec/changes/archive/developer-docs-phase6-update/specs/ws5-readme/spec.md new file mode 100644 index 0000000..47bd2e9 --- /dev/null +++ b/openspec/changes/archive/developer-docs-phase6-update/specs/ws5-readme/spec.md @@ -0,0 +1,77 @@ +# WS5 Spec — README.md + +## Target file +`docs/developers/README.md` + +## Objective +Two surgical edits to the existing file: +1. Fix the "bedroom developers" typo in line 3. +2. Expand the Guides table to list all 9 guides (4 existing + 5 new). + +--- + +## Edit 1 — Fix typo in the opening paragraph + +**Location**: Line 3 of the file (the subtitle/description line). + +**Find** (exact text): +``` +The complete documentation for bedroom developers building with SentryAgent.ai AgentIdP. +``` + +**Replace with**: +``` +The complete documentation for developers building with SentryAgent.ai AgentIdP. +``` + +No other changes to this paragraph. + +--- + +## Edit 2 — Expand the Guides table + +**Find** (the entire Guides section, from heading to end of table): +```markdown +## Guides + +| Guide | What it covers | +|-------|----------------| +| [Register an Agent](guides/register-an-agent.md) | All fields, validation rules, common errors | +| [Manage Credentials](guides/manage-credentials.md) | Generate, list, rotate, revoke credentials | +| [Issue and Revoke Tokens](guides/issue-and-revoke-tokens.md) | OAuth 2.0 client credentials flow, introspect, revoke | +| [Query Audit Logs](guides/query-audit-logs.md) | Filters, pagination, event structure, retention | +``` + +**Replace with**: +```markdown +## Guides + +| Guide | What it covers | +|-------|----------------| +| [Register an Agent](guides/register-an-agent.md) | All fields, org scoping, validation rules, common errors | +| [Manage Credentials](guides/manage-credentials.md) | Generate, list, rotate, revoke credentials | +| [Issue and Revoke Tokens](guides/issue-and-revoke-tokens.md) | OAuth 2.0 client credentials flow, introspect, revoke | +| [Query Audit Logs](guides/query-audit-logs.md) | Filters, pagination, event structure, retention | +| [Use the Analytics Dashboard](guides/use-analytics-dashboard.md) | Query token trends, activity heatmap, per-agent usage | +| [Manage API Tiers](guides/manage-api-tiers.md) | Check current tier, understand limits, trigger upgrade | +| [A2A Delegation](guides/a2a-delegation.md) | Create and verify agent-to-agent delegation chains | +| [Configure Webhooks](guides/configure-webhooks.md) | Subscribe to events, delivery guarantees, inspect history | +| [AGNTCY Compliance](guides/agntcy-compliance.md) | Export agent cards, generate compliance reports, verify audit chain | +``` + +--- + +## Verification checklist + +After both edits: +- Line 3 must read: `The complete documentation for developers building with SentryAgent.ai AgentIdP.` +- The Documents table (the first table in the file, listing Quick Start / Core Concepts / Guides / API Reference) is unchanged. +- The Guides table has exactly 9 rows. +- All guide filenames match exactly the files that WS4 will create: + - `guides/use-analytics-dashboard.md` + - `guides/manage-api-tiers.md` + - `guides/a2a-delegation.md` + - `guides/configure-webhooks.md` + - `guides/agntcy-compliance.md` +- The Free Tier Limits table at the bottom of the file is unchanged. +- The Base URL section is unchanged. diff --git a/openspec/changes/archive/engineering-docs-phase6-update/.openspec.yaml b/openspec/changes/archive/engineering-docs-phase6-update/.openspec.yaml new file mode 100644 index 0000000..94a1cca --- /dev/null +++ b/openspec/changes/archive/engineering-docs-phase6-update/.openspec.yaml @@ -0,0 +1,23 @@ +id: engineering-docs-phase6-update +title: "Engineering Documentation — Phase 6 Complete Update" +status: complete +proposed: 2026-04-04 +approved: 2026-04-04 +approved-by: CEO +completed: 2026-04-04 +workstreams: + - id: WS1 + title: "05-services.md — add Phase 3–6 service deep dives" + status: complete + - id: WS2 + title: "02-architecture.md — update component diagram and data flows" + status: complete + - id: WS3 + title: "11-sdk-guide.md — add Rust SDK section" + status: complete + - id: WS4 + title: "09-testing.md — add AGNTCY conformance, tier, analytics test coverage" + status: complete + - id: WS5 + title: "01-overview, 03-tech-stack, 04-codebase-structure, 06-walkthroughs, README — Phase 3–6 updates" + status: complete diff --git a/openspec/changes/archive/engineering-docs-phase6-update/proposal.md b/openspec/changes/archive/engineering-docs-phase6-update/proposal.md new file mode 100644 index 0000000..c2dc9a7 --- /dev/null +++ b/openspec/changes/archive/engineering-docs-phase6-update/proposal.md @@ -0,0 +1,37 @@ +# OpenSpec Proposal — engineering-docs-phase6-update + +**Status:** Approved +**Proposed:** 2026-04-04 +**Approved by:** CEO + +## Problem Statement + +`docs/engineering/` (12 files) was written during Phase 2. Nine new services, the Rust SDK, +14 new DB migrations, and significant architectural changes shipped across Phases 3–6 with +zero coverage in the engineering documentation. The engineering team cannot onboard or maintain +Phase 3–6 features without accurate internal docs. + +## Workstreams + +### WS1 — 05-services.md +Add deep dives for all 9 Phase 3–6 services: AnalyticsService, TierService, ComplianceService, +A2AService, FederationService, DIDService, OIDCService, WebhookService, BillingService. +Each deep dive must match the format of existing service entries (purpose, public methods, +dependencies, configuration, Redis/DB usage). + +### WS2 — 02-architecture.md +Update the component diagram to include the Next.js portal, analytics pipeline, tier enforcement +layer, and all Phase 3–6 services. Update data-flow descriptions. + +### WS3 — 11-sdk-guide.md +Add a complete Rust SDK section (sdk-rust/) covering installation, authentication, agent +registration, token issuance, error handling, and the contribution guide extension. + +### WS4 — 09-testing.md +Add coverage for: AGNTCY conformance test suite (4 tests), tier enforcement tests, +analytics service tests, and the Phase 6 test count update. + +### WS5 — Remaining files +Targeted updates to: 01-overview.md (Phase 3–6 capabilities), 03-tech-stack.md (new deps), +04-codebase-structure.md (new directories), 06-walkthroughs.md (A2A + tier + analytics flows), +README.md (index update). diff --git a/openspec/changes/archive/engineering-docs-phase6-update/specs/ws1-services/spec.md b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws1-services/spec.md new file mode 100644 index 0000000..c7aa92d --- /dev/null +++ b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws1-services/spec.md @@ -0,0 +1,259 @@ +# WS1 — Service Deep Dives: Phase 3–6 Additions + +**Target file:** `docs/engineering/05-services.md` + +**Operation:** Append the following 9 service entries after the existing `### Prometheus/Grafana Monitoring` section (which is the last entry in the current file). Each entry follows the exact format of existing entries. + +--- + +## Instructions to Developer + +Append the following Markdown verbatim to the end of `docs/engineering/05-services.md`, starting after the final line of the `### Prometheus/Grafana Monitoring` section. Do not modify any existing content. + +--- + +## Content to Append + +```markdown +--- + +### AnalyticsService + +**Purpose**: Records daily aggregated analytics events (token issuances, agent activity) and exposes query methods for token trends, agent activity heatmaps, and per-agent usage summaries. All query methods scope results strictly to the supplied `tenantId`. The `recordEvent` method is fire-and-forget — it catches all errors internally and never propagates them to the caller, so analytics writes never block primary request paths. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `recordEvent` | `tenantId: string, metricType: string` | `Promise` | Upserts a daily counter row in `analytics_events` via `INSERT ... ON CONFLICT DO UPDATE SET count = count + 1`. Catches and swallows all errors; safe to call with `void` on hot paths. | +| `getTokenTrend` | `tenantId: string, days: number` | `Promise` | Returns daily token issuance counts for the last N days (clamped to 90). Uses `generate_series` + `LEFT JOIN` so that days with no events appear as `count: 0`. Results sorted ascending by date. | +| `getAgentActivity` | `tenantId: string` | `Promise` | Returns agent activity bucketed by day-of-week (0=Sun…6=Sat) and hour-of-day for the last 30 days. Reads only rows whose `metric_type` matches the pattern `agent::`. | +| `getAgentUsageSummary` | `tenantId: string` | `Promise` | Returns per-agent token issuance totals for the current calendar month, joined with the agent name (`owner` field). Sorted descending by `token_count`. Excludes decommissioned agents. | + +**Dependencies**: PostgreSQL connection pool (`Pool` from `pg`). No Redis usage. + +**Configuration**: None. `MAX_TREND_DAYS = 90` is a module-level constant. + +**DB tables**: +- `analytics_events`: `organization_id` (UUID FK to `organizations`), `date` (DATE), `metric_type` (text — e.g. `'token_issued'`, `'agent::token_issued'`), `count` (integer). Unique constraint on `(organization_id, date, metric_type)`. +- `agents`: read in `getAgentUsageSummary` to join `owner` and filter by `organization_id`. + +--- + +### TierService + +**Purpose**: Single authority for all subscription tier business logic — fetches current tier and live usage, initiates Stripe Checkout sessions for upgrades, applies confirmed upgrades to the `organizations` table, and enforces per-tier agent count limits. Controllers and middleware delegate all tier decisions to this service; no tier logic lives elsewhere. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `getStatus` | `orgId: string` | `Promise` | Returns current `tier`, per-tier `limits` (from `TIER_CONFIG`), live `usage` (Redis counters + DB agent count), and `resetAt` (ISO 8601 next UTC midnight). Falls back to `0` for Redis counters when Redis is unavailable. | +| `initiateUpgrade` | `orgId: string, targetTier: TierName` | `Promise` | Validates that `targetTier` is strictly higher rank than current tier. Creates a Stripe Checkout Session with `mode: 'subscription'`, `metadata: { orgId, targetTier }`, and the price ID from `STRIPE_PRICE_ID_` env var. Returns `{ checkoutUrl }`. | +| `applyUpgrade` | `orgId: string, tier: TierName` | `Promise` | Sets `organizations.tier` and `organizations.tier_updated_at = NOW()`. Called by the Stripe webhook handler after `checkout.session.completed`. | +| `fetchTier` | `orgId: string` | `Promise` | Queries `organizations.tier` for the given org. Returns `'free'` as a safe default when no row is found or the stored value is not a valid `TierName`. | +| `enforceAgentLimit` | `orgId: string, tier: TierName` | `Promise` | Counts non-decommissioned agents for the org and throws `TierLimitError` when count is at or over `TIER_CONFIG[tier].maxAgents`. No-op for Enterprise (infinite limit). Called by `AgentService` before creating a new agent. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`), Stripe client (`Stripe`). Imports `TIER_CONFIG` and `TIER_RANK` from `src/config/tiers.ts`. + +**Configuration**: +- `STRIPE_PRICE_ID_PRO` — Stripe price ID for the Pro tier +- `STRIPE_PRICE_ID_ENTERPRISE` — Stripe price ID for the Enterprise tier +- `STRIPE_PRICE_ID` — Fallback Stripe price ID when tier-specific vars are not set +- `STRIPE_SUCCESS_URL` — Redirect URL on successful checkout (default: `APP_BASE_URL/dashboard?billing=success`) +- `STRIPE_CANCEL_URL` — Redirect URL when checkout is cancelled (default: `APP_BASE_URL/dashboard?billing=cancel`) +- `APP_BASE_URL` — Base URL for redirect URL construction (default: `http://localhost:3000`) + +**Redis keys**: +- `rate:tier:calls:` — integer, daily API call counter; TTL set at next UTC midnight. Read in `getStatus`. +- `rate:tier:tokens:` — integer, daily token issuance counter; same TTL. Read in `getStatus`. + +**DB tables**: +- `organizations`: `organization_id` (UUID PK), `tier` (text — `'free'|'pro'|'enterprise'`), `tier_updated_at` (timestamptz). Read in `fetchTier`; written in `applyUpgrade`. +- `agents`: read in `enforceAgentLimit` and `getStatus` to count non-decommissioned agents per org. + +**Error types**: +- `ValidationError` (400) — target tier is not higher than current tier +- `TierLimitError` (429) — agent count limit reached for the current tier + +--- + +### ComplianceService + +**Purpose**: Generates AGNTCY-standard compliance reports and exports agent cards for a tenant. Reports cover two sections: `agent-identity` (DID presence and credential expiry checks) and `audit-trail` (cryptographic hash chain verification). Reports are cached in Redis for 5 minutes to avoid repeated expensive DB queries. Agent card export returns all active agents in AGNTCY-standard JSON format. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `generateReport` | `tenantId: string` | `Promise` | Attempts to read `compliance:report:` from Redis; if found, returns it with `from_cache: true`. Otherwise builds the report by running `buildAgentIdentitySection` and `buildAuditTrailSection` in parallel, rolls up the overall status (fail > warn > pass), caches the result for 300 seconds, and returns it. | +| `exportAgentCards` | `tenantId: string` | `Promise` | Queries all non-decommissioned agents for the tenant and maps each to an AGNTCY agent card with `id` (DID or agent UUID), `name`, `capabilities`, `endpoint`, `created_at`, and `agntcy_schema_version: '1.0'`. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`). Internally instantiates `AuditVerificationService` for hash chain verification. + +**Configuration**: None. `CACHE_TTL_SECONDS = 300` and `AGNTCY_SCHEMA_VERSION = '1.0'` are module-level constants. + +**Redis keys**: +- `compliance:report:` — JSON-serialised `IComplianceReport`, TTL 300 seconds. Written by `generateReport`; read on every call within the cache window. + +**DB tables**: +- `agents`: queried in both `buildAgentIdentitySection` (checks DID presence) and `exportAgentCards`. +- `credentials`: queried in `buildAgentIdentitySection` to check active credential expiry per agent. +- `audit_events`: read via `AuditVerificationService` in `buildAuditTrailSection` to verify hash chain integrity. + +**Error types**: None thrown directly. Internal errors in section builders produce `status: 'fail'` sections rather than exceptions. + +**Report structure**: +- `agent-identity` section: `fail` when any active agent is missing a DID or has expired credentials; `warn` when any credential expires within 7 days; `pass` otherwise. +- `audit-trail` section: `fail` when `AuditVerificationService.verifyChain()` returns `verified: false`; `pass` otherwise. + +--- + +### FederationService + +**Purpose**: Manages trusted federation partners and cross-IdP JWT token verification. At partner registration, the partner's JWKS endpoint is validated and the keys are cached in Redis. At token verification, the service fetches (or reuses cached) partner JWKS, verifies the JWT signature and standard claims, enforces the partner's `allowed_organizations` filter, and rejects tokens from suspended or expired partners. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `registerPartner` | `req: ICreatePartnerRequest` | `Promise` | Validates the `jwks_uri` is reachable (5-second timeout) and returns valid JWKS. Inserts the partner row into `federation_partners`. Caches the JWKS in Redis under `federation:jwks:`. | +| `listPartners` | _(none)_ | `Promise` | Updates any partners past `expires_at` to `status = 'expired'` before returning all rows ordered by `created_at DESC`. | +| `getPartner` | `id: string` | `Promise` | Applies the same expiry update, then returns the partner row. Throws `FederationPartnerNotFoundError` (404) when not found. | +| `updatePartner` | `id: string, req: IUpdatePartnerRequest` | `Promise` | Applies a partial update. When `jwks_uri` changes, invalidates the old issuer's JWKS cache entry (`DEL federation:jwks:`). | +| `deletePartner` | `id: string` | `Promise` | Deletes the partner row and invalidates the JWKS cache. | +| `verifyFederatedToken` | `req: IFederationVerifyRequest` | `Promise` | Decodes token header/payload without verification, rejects `alg:none`, looks up partner by `iss`, checks partner status and expiry, fetches JWKS (cache-first), finds matching key by `kid`, converts JWK to PEM, verifies signature via `jsonwebtoken.verify` (RS256 or ES256), enforces `allowed_organizations` filter. Returns `{ valid, issuer, subject, organization_id, claims }`. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`). Uses `jsonwebtoken` for JWT decoding/verification and Node.js `crypto.createPublicKey` for JWK-to-PEM conversion. + +**Configuration**: +- `FEDERATION_JWKS_CACHE_TTL_SECONDS` — TTL for cached partner JWKS in Redis (default: `3600`) + +**Redis keys**: +- `federation:jwks:` — JSON-serialised `IJWKSKey[]`, TTL from `FEDERATION_JWKS_CACHE_TTL_SECONDS`. Written on partner registration and on cache miss during token verification; deleted when a partner is updated (JWKS URI changed) or deleted. + +**DB tables**: +- `federation_partners`: `id` (UUID PK), `name` (text), `issuer` (text — the IdP's issuer URL), `jwks_uri` (text), `allowed_organizations` (text[] — empty means all orgs allowed), `status` (`active|suspended|expired`), `created_at`, `updated_at`, `expires_at` (nullable timestamptz). + +**Error types**: +- `FederationPartnerError` (400) — JWKS endpoint unreachable or returns invalid JWKS +- `FederationPartnerNotFoundError` (404) — partner UUID not found +- `FederationVerificationError` (401) — token malformed, alg:none, unknown issuer, partner suspended/expired, signature invalid, org not in allow list + +--- + +### DIDService + +**Purpose**: Manages W3C DID Core 1.0 document generation, EC P-256 key pair creation, and AGNTCY agent card export. Generates per-agent `did:web` identifiers, stores private keys in HashiCorp Vault (or records a `dev:no-vault` marker in dev mode), and caches DID documents in Redis. Builds both an instance-level DID document (for AgentIdP itself) and per-agent DID documents with AGNTCY extension properties. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `generateDIDForAgent` | `agentId: string, organizationId: string` | `Promise<{ did: string; publicKeyJwk: IPublicKeyJwk }>` | Generates an EC P-256 key pair. Stores the private key PEM in Vault KV v2 at `{mount}/data/agentidp/agents/{agentId}/did-key`. Encrypts the vault path via `EncryptionService` (when configured). Inserts a row into `agent_did_keys`. Updates `agents.did` and `agents.did_created_at`. Returns the `did:web` identifier and public key JWK. | +| `buildInstanceDIDDocument` | _(none)_ | `Promise` | Builds the root instance DID document for AgentIdP (format: `did:web:{DID_WEB_DOMAIN}`). Cached in Redis under `did:doc:instance`. | +| `buildAgentDIDDocument` | `agentId: string` | `Promise` | Builds a per-agent DID document (format: `did:web:{DID_WEB_DOMAIN}:agents:{agentId}`). Decommissioned agents get a deactivated document with an `AgentStatus: decommissioned` service entry. Cached in Redis under `did:doc:{agentId}` for active agents only. Throws `AgentNotFoundError` if the agent does not exist. | +| `buildResolutionResult` | `agentId: string` | `Promise` | Wraps `buildAgentDIDDocument` with W3C DID Resolution metadata (`didDocumentMetadata`, `didResolutionMetadata`). | +| `buildAgentCard` | `agentId: string` | `Promise` | Returns an AGNTCY-format agent card with `did`, `name` (agent email), `agentType`, `capabilities`, `owner`, `version`, `deploymentEnv`, `identityProvider`, and `issuedAt`. | + +**Dependencies**: PostgreSQL (`Pool`), Redis (`RedisClientType`), optional `VaultClient`, optional `EncryptionService`. Uses `node-vault` directly for DID private key storage. + +**Configuration**: +- `DID_WEB_DOMAIN` — required; the domain for `did:web` DID construction (e.g. `idp.sentryagent.ai`) +- `DID_DOCUMENT_CACHE_TTL_SECONDS` — Redis cache TTL for DID documents (default: `300`) +- `VAULT_ADDR`, `VAULT_TOKEN`, `VAULT_MOUNT` — when set, private keys are stored in Vault; otherwise `dev:no-vault` marker is used + +**Redis keys**: +- `did:doc:instance` — JSON-serialised instance `IDIDDocument`, TTL from `DID_DOCUMENT_CACHE_TTL_SECONDS` +- `did:doc:` — JSON-serialised per-agent `IDIDDocument`, same TTL. Not cached for decommissioned agents. + +**DB tables**: +- `agents`: `did` (text — `did:web:...`), `did_created_at` (timestamptz). Written by `generateDIDForAgent`; read in all document-building methods. +- `agent_did_keys`: `key_id` (UUID PK), `agent_id` (UUID FK), `organization_id` (UUID FK), `public_key_jwk` (JSONB), `vault_key_path` (text — Vault KV v2 path or `dev:no-vault`), `key_type` (`'EC'`), `curve` (`'P-256'`), `created_at`. Written by `generateDIDForAgent`. + +**Error types**: +- `AgentNotFoundError` (404) — agent UUID not found in `buildAgentDIDDocument`, `buildResolutionResult`, `buildAgentCard` + +--- + +### WebhookService + +**Purpose**: Manages webhook subscriptions and their delivery history for a tenant organisation. HMAC signing secrets are stored in HashiCorp Vault KV v2 (when configured) or bcrypt-hashed in PostgreSQL in local mode. The raw secret is only returned once at subscription creation time. `vault_secret_path` is encrypted at rest via `EncryptionService` (AES-256-CBC) before being written to PostgreSQL (SOC 2 CC6.1 compliance). + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `createSubscription` | `orgId: string, req: ICreateWebhookRequest` | `Promise` | Generates a 32-byte random hex HMAC secret. Stores in Vault at `secret/data/agentidp/webhooks/{orgId}/{id}/secret` (Vault mode) or bcrypt-hashes and stores in `secret_hash` (local mode). Encrypts `vault_secret_path` via `EncryptionService`. Returns the subscription including the one-time `secret`. Validates URL must use `https://` and events array must be non-empty. | +| `listSubscriptions` | `orgId: string` | `Promise` | Returns all subscriptions for the org, ordered by `created_at DESC`. No secret fields are included. | +| `getSubscription` | `id: string, orgId: string` | `Promise` | Returns a single subscription. Verifies org ownership. | +| `updateSubscription` | `id: string, orgId: string, req: IUpdateWebhookRequest` | `Promise` | Partially updates `name`, `url`, `events`, or `active` fields. Validates `https://` if URL is changing. | +| `deleteSubscription` | `id: string, orgId: string` | `Promise` | Permanently deletes the subscription and all deliveries (via PostgreSQL CASCADE). | +| `getSubscriptionSecret` | `subscriptionId: string, orgId: string` | `Promise` | Retrieves the raw HMAC secret from Vault (Vault mode only). Throws `WebhookValidationError` in local mode since the secret cannot be recovered after creation. | +| `listDeliveries` | `subscriptionId: string, orgId: string, limit: number, offset: number` | `Promise` | Returns paginated delivery records for a subscription. Verifies org ownership before querying. | + +**Dependencies**: PostgreSQL (`Pool`), optional `VaultClient`, Redis (`RedisClientType` — reserved for future caching), optional `EncryptionService`. + +**Configuration**: Inherits Vault configuration from `VaultClient` (`VAULT_ADDR`, `VAULT_TOKEN`, `VAULT_MOUNT`). `EncryptionService` requires `ENCRYPTION_KEY` env var (see `EncryptionService` docs). + +**DB tables**: +- `webhook_subscriptions`: `id` (UUID PK), `organization_id` (UUID FK), `name` (text), `url` (text — https only), `events` (JSONB — `WebhookEventType[]`), `secret_hash` (text — bcrypt hash in local mode, `'vault'` in Vault mode), `vault_secret_path` (text — encrypted Vault path or `'local'`), `active` (boolean), `failure_count` (integer), `created_at`, `updated_at`. +- `webhook_deliveries`: `id` (UUID PK), `subscription_id` (UUID FK), `event_type` (text), `payload` (JSONB), `status` (`pending|delivered|failed|dead_letter`), `http_status_code` (integer nullable), `attempt_count` (integer), `next_retry_at` (timestamptz nullable), `delivered_at` (timestamptz nullable), `created_at`, `updated_at`. Cascades on subscription delete. + +**Error types**: +- `WebhookNotFoundError` (404) — subscription not found or belongs to another org +- `WebhookValidationError` (400) — invalid URL scheme, empty events array, or secret not recoverable in local mode + +--- + +### BillingService + +**Purpose**: Manages Stripe billing integration — creates Checkout Sessions for tenant subscriptions, processes incoming Stripe webhook events (subscription lifecycle and checkout completion), and retrieves current subscription status. When a `checkout.session.completed` event carries `{ orgId, targetTier }` in its metadata, delegates to `TierService.applyUpgrade` to update the organisation's tier. + +**Public methods**: + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `createCheckoutSession` | `tenantId: string, successUrl: string, cancelUrl: string` | `Promise` | Creates a Stripe Checkout Session with `mode: 'subscription'`, `client_reference_id: tenantId`, and the price from `STRIPE_PRICE_ID`. Returns the checkout URL. Throws if Stripe does not return a URL. | +| `handleWebhookEvent` | `rawBody: Buffer, sig: string, webhookSecret: string` | `Promise` | Verifies the Stripe webhook signature via `stripe.webhooks.constructEvent`. Handles `customer.subscription.created/updated/deleted` (upserts `tenant_subscriptions`) and `checkout.session.completed` (applies tier upgrade via `TierService` when metadata contains `orgId` and `targetTier`). | +| `getSubscriptionStatus` | `tenantId: string` | `Promise` | Queries `tenant_subscriptions` for the given tenant. Returns `{ tenantId, status: 'free', currentPeriodEnd: null, stripeSubscriptionId: null }` when no row exists. | + +**Dependencies**: PostgreSQL (`Pool`), Stripe client (`Stripe`), optional `TierService`. + +**Configuration**: +- `STRIPE_PRICE_ID` — Stripe price ID for subscription checkout sessions +- `STRIPE_WEBHOOK_SECRET` — Stripe webhook endpoint secret (`whsec_...`); passed by the webhook controller, not read directly by the service + +**DB tables**: +- `tenant_subscriptions`: `tenant_id` (UUID PK or unique), `status` (text — `'free'|'active'|'past_due'|'canceled'`), `stripe_customer_id` (text), `stripe_subscription_id` (text), `current_period_end` (timestamptz nullable), `updated_at`. Upserted on subscription lifecycle events. + +**Error types**: None defined in the service. Stripe signature failures raise `Error` from `stripe.webhooks.constructEvent`; these propagate to the error handler as 400 responses. + +--- + +### OIDCService (A2A / OIDC Provider) + +**Note**: `src/services/OIDCService.ts` does not exist as a standalone file — OIDC provider functionality is handled by the `oidc-provider` npm package, configured in `src/app.ts` and related route files. The service boundary for OIDC-related business logic is the `DelegationService`. Document the OIDC integration as follows. + +**Purpose**: The OIDC/A2A subsystem provides agent-to-agent (A2A) delegation using the `oidc-provider` library (v9.7.x). The provider is mounted as a sub-application at `/oidc` and issues short-lived delegation tokens scoped to a specific `delegatee_id`. The `DelegationService` (`src/services/DelegationService.ts`) manages the `delegation_chains` table for auditing. + +**Key endpoints exposed by the OIDC provider**: +- `POST /oidc/token` — issues delegation tokens via `client_credentials` or custom grant +- `GET /oidc/.well-known/openid-configuration` — OIDC discovery document +- `GET /oidc/jwks` — public JWK Set for verifying delegation tokens + +**DelegationService public methods** (from `src/services/DelegationService.ts`): + +| Method | Parameters | Returns | Description | +|--------|-----------|---------|-------------| +| `createDelegation` | `delegatorId: string, delegateeId: string, scope: string, expiresAt?: Date` | `Promise` | Inserts a delegation chain record into `delegation_chains`. Validates both agents exist and are active. | +| `verifyDelegation` | `token: string, delegateeId: string` | `Promise` | Verifies the delegation token signature and checks the chain record is active and not expired. | +| `revokeDelegation` | `chainId: string, delegatorId: string` | `Promise` | Sets `delegation_chains.status = 'revoked'` and `revoked_at = NOW()`. Validates the delegator owns the chain. | + +**DB tables**: +- `delegation_chains`: `chain_id` (UUID PK), `delegator_id` (UUID), `delegatee_id` (UUID), `scope` (text), `status` (`active|revoked|expired`), `created_at`, `expires_at` (nullable), `revoked_at` (nullable), `token` (text — the delegation JWT). + +**Configuration**: +- `A2A_ENABLED` — when set to `'false'`, A2A/delegation endpoints return 404 +- `OIDC_ISSUER` — issuer URL for the OIDC provider +``` diff --git a/openspec/changes/archive/engineering-docs-phase6-update/specs/ws2-architecture/spec.md b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws2-architecture/spec.md new file mode 100644 index 0000000..b64b353 --- /dev/null +++ b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws2-architecture/spec.md @@ -0,0 +1,235 @@ +# WS2 — Architecture Documentation Updates + +**Target file:** `docs/engineering/02-architecture.md` + +**Operation:** Surgical replacements and additions to the existing document. Apply in the order listed below. + +--- + +## Change 1 — Replace Component Diagram + +**Location:** Section `## 1. Component Diagram` + +**Old text (entire Mermaid block and surrounding content — replace from `\`\`\`mermaid` through the closing `\`\`\``):** + +``` +```mermaid +graph TD + Client["Client (AI Agent / Browser / CI)"] + + Client -->|HTTPS| ExpressApp["Express App (AgentIdP)"] + + subgraph ExpressApp["Express App — src/app.ts"] + Router["Router (src/routes/)"] + AuthMW["authMiddleware (src/middleware/auth.ts)"] + OpaMW["opaMiddleware (src/middleware/opa.ts)"] + Controller["Controller (src/controllers/)"] + Service["Service (src/services/)"] + Repository["Repository (src/repositories/)"] + Router --> AuthMW --> OpaMW --> Controller --> Service --> Repository + end + + Repository -->|parameterized SQL| PG["PostgreSQL 14\n(agents, credentials, audit_events, token_revocations)"] + Service -->|Redis commands| Redis["Redis 7\n(token revocation list, monthly counts, rate-limit counters)"] + Service -->|KV v2 read/write| Vault["HashiCorp Vault\n(opt-in — when VAULT_ADDR is set)"] + + ExpressApp -->|evaluate input| OPA["OPA Policy Engine\n(policies/authz.rego + data/scopes.json)"] + ExpressApp -->|expose| Metrics["/metrics (prom-client)"] + + Dashboard["Dashboard SPA (React 18 + Vite 5)\ndashboard/dist/ served from /dashboard"] + Client -->|browser| Dashboard + Dashboard -->|REST API calls| ExpressApp + + Grafana["Grafana (port 3001)"] -->|scrapes| Metrics +``` +``` + +**New text (replace with the expanded diagram):** + +``` +```mermaid +graph TD + Client["Client (AI Agent / Browser / CI)"] + + Client -->|HTTPS| ExpressApp["Express App (AgentIdP)"] + + subgraph ExpressApp["Express App — src/app.ts"] + Router["Router (src/routes/)"] + AuthMW["authMiddleware (src/middleware/auth.ts)"] + TierMW["tierMiddleware (src/middleware/tier.ts)"] + OpaMW["opaMiddleware (src/middleware/opa.ts)"] + Controller["Controller (src/controllers/)"] + Service["Service (src/services/)"] + Repository["Repository (src/repositories/)"] + Router --> AuthMW --> TierMW --> OpaMW --> Controller --> Service --> Repository + end + + Repository -->|parameterized SQL| PG["PostgreSQL 14\n(agents, credentials, audit_events,\nanalytics_events, organizations,\nfederation_partners, webhook_subscriptions,\nagent_did_keys, delegation_chains)"] + Service -->|Redis commands| Redis["Redis 7\n(token revocation list, daily tier counters,\nJWKS cache, compliance report cache,\nDID document cache)"] + Service -->|KV v2 read/write| Vault["HashiCorp Vault\n(opt-in — credentials, DID private keys,\nwebhook secrets — when VAULT_ADDR is set)"] + + ExpressApp -->|evaluate input| OPA["OPA Policy Engine\n(policies/authz.rego + data/scopes.json)"] + ExpressApp -->|expose| Metrics["/metrics (prom-client)"] + ExpressApp -->|checkout session / webhooks| Stripe["Stripe\n(billing — when STRIPE_SECRET_KEY is set)"] + + Dashboard["Dashboard SPA (React 18 + Vite 5)\ndashboard/dist/ served from /dashboard"] + Portal["Developer Portal (Next.js 14)\nportal/ — served separately on port 3002"] + Client -->|browser| Dashboard + Client -->|browser| Portal + Dashboard -->|REST API calls| ExpressApp + Portal -->|REST API calls| ExpressApp + + Grafana["Grafana (port 3001)"] -->|scrapes| Metrics + + OIDCProvider["OIDC Provider (oidc-provider v9)\nmounted at /oidc — A2A delegation tokens"] + ExpressApp --- OIDCProvider +``` +``` + +--- + +## Change 2 — Add New Services to Section 2 (HTTP Request Lifecycle) + +**Location:** Section `## 2. HTTP Request Lifecycle` + +**Find the paragraph that starts with:** +``` +7. The service (`src/services/*.ts`) executes all business logic — enforces free-tier limits, resolves domain rules, and calls repositories. +``` + +**Replace that single numbered item with:** +``` +7. The service (`src/services/*.ts`) executes all business logic — enforces tier limits, resolves domain rules, and calls repositories. Phase 3–6 introduces specialised services: `AnalyticsService` (fire-and-forget event recording), `TierService` (enforces per-tier agent and call limits), `ComplianceService` (AGNTCY compliance reports, cached 5 min in Redis), `FederationService` (cross-IdP JWT verification with cached JWKS), `DIDService` (W3C DID document generation and caching), `WebhookService` (subscription management with Vault-backed HMAC secrets), and `BillingService` (Stripe Checkout and webhook processing). The service has no knowledge of HTTP. +``` + +--- + +## Change 3 — Add Tier Enforcement Middleware Description + +**Location:** Section `## 2. HTTP Request Lifecycle` + +**Find item 5:** +``` +5. `opaMiddleware` (`src/middleware/opa.ts`) evaluates the OPA policy +``` + +**Insert a new item between item 4 (authMiddleware) and item 5 (opaMiddleware). Re-number subsequent items accordingly. The new item is:** + +``` +5. `tierMiddleware` (`src/middleware/tier.ts`) enforces per-tier daily API call limits. It reads the organisation's current tier from `TierService.fetchTier(orgId)`, checks the daily call counter from Redis key `rate:tier:calls:` against `TIER_CONFIG[tier].maxCallsPerDay`, increments the counter on each passing request (fire-and-forget `INCR` with TTL set to next UTC midnight), and throws `TierLimitError` (429) when the limit is reached. This middleware is applied only to API routes, not to `/health`, `/metrics`, or `/dashboard`. +``` + +Re-number the former item 5 (opaMiddleware) through the end of the list as 6 through 11 (adding one to each subsequent number). + +--- + +## Change 4 — Add New Data Flows Section + +**Location:** After the closing of `## 3. OAuth 2.0 Client Credentials Flow` and before `## 4. Multi-Region Deployment Topology` + +**Insert the following new section:** + +```markdown +--- + +## 3b. Analytics Event Capture Flow + +Every successful token issuance writes a fire-and-forget analytics event: + +```mermaid +sequenceDiagram + participant Controller as TokenController + participant OAuth2Svc as OAuth2Service + participant AnalyticsSvc as AnalyticsService + participant PG as PostgreSQL + + Controller->>OAuth2Svc: issueToken(clientId, clientSecret, scope, ...) + OAuth2Svc->>OAuth2Svc: signToken() — RS256 JWT + OAuth2Svc-->>Controller: ITokenResponse + + Note over OAuth2Svc,AnalyticsSvc: fire-and-forget (void) + OAuth2Svc-)AnalyticsSvc: recordEvent(tenantId, 'token_issued') + AnalyticsSvc-)PG: INSERT INTO analytics_events ... ON CONFLICT DO UPDATE count + 1 +``` + +`recordEvent` uses PostgreSQL `UPSERT` — one row per `(organization_id, date, metric_type)`. If the INSERT conflicts (same date, same org, same metric), the `count` column is incremented atomically. This keeps the table compact (one row per day per metric type per org) and fast to query. + +--- + +## 3c. Tier Enforcement Middleware Chain + +```mermaid +sequenceDiagram + actor Agent + participant TierMW as tierMiddleware + participant TierSvc as TierService + participant Redis + participant PG as PostgreSQL + + Agent->>TierMW: API request (with valid Bearer token) + TierMW->>TierSvc: fetchTier(orgId) + TierSvc->>PG: SELECT tier FROM organizations WHERE organization_id = $1 + PG-->>TierSvc: 'pro' + TierSvc-->>TierMW: 'pro' + + TierMW->>Redis: GET rate:tier:calls: + Redis-->>TierMW: "4999" (current daily count) + + Note over TierMW: TIER_CONFIG['pro'].maxCallsPerDay = 50000 — limit not reached + + TierMW-)Redis: INCR rate:tier:calls: (fire-and-forget, TTL = next UTC midnight) + TierMW->>Agent: next() — request proceeds to opaMiddleware +``` + +When the counter equals or exceeds the tier limit, `tierMiddleware` throws `TierLimitError` (429) before `opaMiddleware` runs. The daily counter resets at UTC midnight via Redis TTL. + +--- + +## 3d. A2A Delegation End-to-End Flow + +```mermaid +sequenceDiagram + actor Delegator as Delegator Agent + actor Delegatee as Delegatee Agent + participant AgentIdP + participant DelegationSvc as DelegationService + participant OIDCProvider as OIDC Provider + participant PG as PostgreSQL + + Delegator->>AgentIdP: POST /api/v1/oauth2/token/delegate
{ delegatee_id, scope } + AgentIdP->>DelegationSvc: createDelegation(delegatorId, delegateeId, scope) + DelegationSvc->>PG: INSERT INTO delegation_chains ... + PG-->>DelegationSvc: chain_id + DelegationSvc->>OIDCProvider: issue delegation JWT (delegator claims + delegatee sub) + OIDCProvider-->>DelegationSvc: signed delegation token + DelegationSvc-->>AgentIdP: IDelegationChain (with token) + AgentIdP-->>Delegator: 201 { token, chain_id } + + Note over Delegatee,AgentIdP: Delegatee uses the delegation token + Delegatee->>AgentIdP: POST /api/v1/oauth2/token/verify-delegation
{ token } + AgentIdP->>DelegationSvc: verifyDelegation(token, delegateeId) + DelegationSvc->>PG: SELECT * FROM delegation_chains WHERE chain_id = $1 AND status = 'active' + PG-->>DelegationSvc: chain row (not expired, not revoked) + DelegationSvc->>OIDCProvider: verify token signature + OIDCProvider-->>DelegationSvc: verified claims + DelegationSvc-->>AgentIdP: IDelegationVerifyResult { valid: true, ... } + AgentIdP-->>Delegatee: 200 { valid: true, delegatorId, scope } +``` +``` +``` + +--- + +## Change 5 — Add New PostgreSQL Tables to Section 2 + +**Location:** Section `## 2. HTTP Request Lifecycle`, item 8 (Repository layer description). + +**Find the text:** +``` +8. The repository (`src/repositories/*.ts`) executes parameterized SQL against PostgreSQL via `node-postgres`, or issues Redis commands via the `redis` client. No business logic lives here. +``` + +**Replace with:** +``` +8. The repository (`src/repositories/*.ts`) executes parameterized SQL against PostgreSQL via `node-postgres`, or issues Redis commands via the `redis` client. No business logic lives here. Phase 3–6 added the following tables: `analytics_events` (daily metric counters), `organizations` (org tier and billing), `federation_partners` (cross-IdP trust registry), `webhook_subscriptions` and `webhook_deliveries` (outbound event delivery), `agent_did_keys` (public EC keys for DID documents), `delegation_chains` (A2A delegation records), `tenant_subscriptions` (Stripe subscription status). +``` diff --git a/openspec/changes/archive/engineering-docs-phase6-update/specs/ws3-sdk-rust/spec.md b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws3-sdk-rust/spec.md new file mode 100644 index 0000000..1a114a1 --- /dev/null +++ b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws3-sdk-rust/spec.md @@ -0,0 +1,300 @@ +# WS3 — Rust SDK Documentation + +**Target file:** `docs/engineering/11-sdk-guide.md` + +**Operation:** Append the following complete section to the end of `docs/engineering/11-sdk-guide.md`, after the final line of `## 6. SDK Contribution Guide — Adding a New Endpoint`. + +--- + +## Instructions to Developer + +Append the following Markdown verbatim to the end of `docs/engineering/11-sdk-guide.md`. Do not modify any existing content. The new section is `## 6. Rust SDK` (the current section 6 becomes section 7 — renumber it as part of this change). + +**Renaming instruction:** Change the existing heading `## 6. SDK Contribution Guide — Adding a New Endpoint` to `## 7. SDK Contribution Guide — Adding a New Endpoint` before appending. The new Rust SDK section takes the `## 6` slot. + +--- + +## Content to Insert (before the existing Section 6, which becomes Section 7) + +Insert the following after the Java SDK section (`## 5. Java SDK`) and before the existing contribution guide (which becomes `## 7`): + +```markdown +--- + +## 6. Rust SDK + +The Rust SDK (`sdk-rust/`) is a production-grade, async-first client for the SentryAgent.ai AgentIdP API. It provides full coverage of the 14 API endpoints across agent identity, OAuth 2.0 token management, credential rotation, audit logs, the public marketplace, and agent-to-agent (A2A) delegation. + +**Requirements:** Rust 1.75+ (stable), `tokio` runtime. + +--- + +### Installation + +Add the crate to your `Cargo.toml`: + +```toml +[dependencies] +sentryagent-idp = "1.0" +tokio = { version = "1.35", features = ["full"] } +``` + +The crate uses `reqwest` with `rustls-tls` (no OpenSSL dependency) and `serde` for JSON serialisation. + +--- + +### Authentication + +The Rust SDK uses the OAuth 2.0 Client Credentials grant, managed transparently by `TokenManager`. You never call `TokenManager` directly — it is embedded in `AgentIdPClient` and invoked automatically before every request. + +**Token refresh behaviour:** +- The first API call triggers a `POST /oauth2/token` request with `grant_type=client_credentials`. +- The returned token is cached behind an async `tokio::sync::Mutex`. +- Subsequent calls within the token lifetime return the cached token without a network round trip. +- The cache expires 60 seconds before the server-reported `expires_in`, ensuring tokens never expire mid-flight. +- The `Mutex` guarantees only one refresh happens even when many `tokio` tasks call `get_token()` concurrently. + +**Environment variable construction:** + +```rust +use sentryagent_idp::AgentIdPClient; + +// from_env() reads AGENTIDP_API_URL, AGENTIDP_CLIENT_ID, AGENTIDP_CLIENT_SECRET +let client = AgentIdPClient::from_env()?; +``` + +**Explicit construction:** + +```rust +use sentryagent_idp::AgentIdPClient; + +let client = AgentIdPClient::new( + "https://api.sentryagent.ai", + "a1b2c3d4-e5f6-7890-abcd-ef1234567890", + "sk_live_...", +); +``` + +| Environment Variable | Required | Purpose | +|---|---|---| +| `AGENTIDP_API_URL` | Yes | Base URL of the AgentIdP API | +| `AGENTIDP_CLIENT_ID` | Yes | OAuth 2.0 client identifier | +| `AGENTIDP_CLIENT_SECRET` | Yes | OAuth 2.0 client secret | + +--- + +### Complete Working Example + +The following example covers the full agent identity lifecycle: register → generate credentials → issue token → retrieve agent → list audit logs → delete agent. + +```rust +use sentryagent_idp::{ + AgentIdPClient, AgentIdPError, + AuditLogFilters, MarketplaceFilters, RegisterAgentRequest, +}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Build client from environment variables. + // Requires: AGENTIDP_API_URL, AGENTIDP_CLIENT_ID, AGENTIDP_CLIENT_SECRET + let client = AgentIdPClient::from_env()?; + + // ── Register a new agent ────────────────────────────────────────────────── + let agent = client.register_agent(RegisterAgentRequest { + name: "my-screener-agent".to_owned(), + description: Some("Screens resumes using ML".to_owned()), + agent_type: "screener".to_owned(), + capabilities: vec!["resume:read".to_owned(), "classify".to_owned()], + metadata: None, + }).await?; + + println!("Registered: {} (DID: {})", agent.id, agent.did); + + // ── Generate credentials for the agent ─────────────────────────────────── + let creds = client.generate_credentials(&agent.id).await?; + println!("Client ID: {}", creds.client_id); + println!("Client Secret: {} (store this — shown once)", creds.client_secret); + + // ── Issue a scoped token (TokenManager handles this automatically) ──────── + let token_resp = client.issue_token(&agent.id, &["agents:read", "agents:write"]).await?; + println!("Token type: {}, expires in {}s", token_resp.token_type, token_resp.expires_in); + + // ── Retrieve the agent ──────────────────────────────────────────────────── + let fetched = client.get_agent(&agent.id).await?; + println!("Fetched: {} (public: {})", fetched.name, fetched.is_public); + + // ── List agents ─────────────────────────────────────────────────────────── + let list = client.list_agents(Some(1), Some(10)).await?; + println!("Total agents: {}", list.total); + + // ── Audit logs ──────────────────────────────────────────────────────────── + let logs = client.list_audit_logs(AuditLogFilters { + agent_id: Some(agent.id.clone()), + event_type: None, + from: None, + to: None, + page: 1, + per_page: 10, + }).await?; + println!("Audit events: {}", logs.total); + + // ── Rotate credentials ──────────────────────────────────────────────────── + let new_creds = client.rotate_credentials(&agent.id).await?; + println!("New secret: {}", new_creds.client_secret); + + // ── Delete agent ────────────────────────────────────────────────────────── + client.delete_agent(&agent.id).await?; + println!("Agent deleted."); + + Ok(()) +} +``` + +Run the bundled quickstart example directly: + +```bash +AGENTIDP_API_URL=http://localhost:3000 \ +AGENTIDP_CLIENT_ID=your-client-id \ +AGENTIDP_CLIENT_SECRET=your-client-secret \ +cargo run --example quickstart +``` + +--- + +### Client Methods Reference + +All methods are `async` and return `Result`. The client is cheap to clone — the inner `reqwest::Client` and token cache are shared via `Arc`. + +**Agent Registry** (`sdk-rust/src/agents.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `register_agent` | `(req: RegisterAgentRequest) -> Result` | `POST /agents` — 201 | +| `get_agent` | `(agent_id: &str) -> Result` | `GET /agents/{id}` — 200 | +| `list_agents` | `(page: Option, per_page: Option) -> Result` | `GET /agents` — 200 | +| `update_agent` | `(agent_id: &str, req: UpdateAgentRequest) -> Result` | `PATCH /agents/{id}` — 200 | +| `delete_agent` | `(agent_id: &str) -> Result<()>` | `DELETE /agents/{id}` — 204 | + +**Credential Management** (`sdk-rust/src/credentials.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `generate_credentials` | `(agent_id: &str) -> Result` | `POST /agents/{id}/credentials` — 201. `client_secret` shown once. | +| `rotate_credentials` | `(agent_id: &str) -> Result` | `POST /agents/{id}/credentials/rotate` — 200. New secret shown once. | +| `revoke_credentials` | `(agent_id: &str, cred_id: &str) -> Result<()>` | `DELETE /agents/{id}/credentials/{cred_id}` — 204 | + +**Token Operations** (`sdk-rust/src/oauth2.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `issue_token` | `(agent_id: &str, scopes: &[&str]) -> Result` | Issues a scoped Bearer JWT. Token is cached by `TokenManager` automatically. | + +**Audit Log** (`sdk-rust/src/audit.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `list_audit_logs` | `(filters: AuditLogFilters) -> Result` | Paginated audit log query with optional agent_id, event_type, from, to filters. | + +**Marketplace** (`sdk-rust/src/marketplace.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `list_public_agents` | `(filters: MarketplaceFilters) -> Result` | Lists publicly discoverable agents with optional `q`, `capability`, `publisher` filters. | + +**A2A Delegation** (`sdk-rust/src/delegation.rs`): + +| Method | Signature | Description | +|--------|-----------|-------------| +| `delegate` | `(req: DelegateRequest) -> Result` | Creates a delegation chain and returns the delegation JWT. | +| `verify_delegation` | `(token: &str) -> Result` | Verifies a delegation token and returns the verified claims. | + +--- + +### Error Types + +All SDK operations return `Result`. Match on the enum variants for structured error handling: + +```rust +use sentryagent_idp::AgentIdPError; + +match client.get_agent("unknown-id").await { + Ok(agent) => println!("Found: {}", agent.name), + Err(AgentIdPError::NotFound(msg)) => { + eprintln!("Agent not found: {}", msg); + } + Err(AgentIdPError::AuthError(msg)) => { + eprintln!("Auth failed: {}", msg); + // Token may have been revoked — check credentials + } + Err(AgentIdPError::RateLimited { retry_after_secs }) => { + eprintln!("Rate limited — retry after {}s", retry_after_secs); + tokio::time::sleep(std::time::Duration::from_secs(retry_after_secs)).await; + } + Err(AgentIdPError::ApiError { status, message, code }) => { + eprintln!("API error {}: {} (code: {:?})", status, message, code); + } + Err(AgentIdPError::ConfigError(msg)) => { + // Missing environment variable — fix before running + eprintln!("Config error: {}", msg); + } + Err(AgentIdPError::HttpError(e)) => { + // reqwest transport error — network issue + eprintln!("HTTP transport error: {}", e); + } + Err(AgentIdPError::SerdeError(e)) => { + // JSON parse failure — API response shape mismatch + eprintln!("Serialization error: {}", e); + } + Err(AgentIdPError::DelegationError(msg)) => { + eprintln!("Delegation chain invalid: {}", msg); + } +} +``` + +| Variant | Trigger | HTTP status | +|---------|---------|-------------| +| `HttpError(reqwest::Error)` | Network-level failure (connection refused, timeout) | N/A | +| `ApiError { status, message, code }` | Non-2xx response not matching a specific variant | Any non-2xx | +| `AuthError(String)` | 401 or 403 from the API | 401, 403 | +| `NotFound(String)` | 404 from the API | 404 | +| `RateLimited { retry_after_secs }` | 429 — parses `Retry-After` header (defaults to 60s) | 429 | +| `ConfigError(String)` | Missing env var in `from_env()` | N/A | +| `SerdeError(serde_json::Error)` | JSON deserialisation failure | N/A | +| `DelegationError(String)` | Invalid delegation chain | N/A | + +--- + +### Adding a New Endpoint to the Rust SDK + +When the AgentIdP server adds a new API endpoint, add it to the Rust SDK using this checklist: + +**File structure** (`sdk-rust/src/`): + +``` +sdk-rust/src/ +├── lib.rs # Crate root — re-exports and module declarations +├── client.rs # AgentIdPClient struct and new()/from_env() constructors +├── token_manager.rs # TokenManager — async token cache +├── models.rs # All request/response structs (serde Serialize/Deserialize) +├── error.rs # AgentIdPError enum +├── agents.rs # Agent registry methods (impl AgentIdPClient) +├── credentials.rs # Credential management methods +├── oauth2.rs # Token issuance methods +├── audit.rs # Audit log methods +├── marketplace.rs # Marketplace methods +└── delegation.rs # A2A delegation methods +``` + +**Checklist:** + +- [ ] Add request/response structs to `models.rs` with `#[derive(Debug, serde::Serialize, serde::Deserialize)]` +- [ ] Add the method to the appropriate `impl AgentIdPClient` block in the relevant `.rs` file. If the endpoint belongs to a new domain, create a new file and declare it as `pub mod ;` in `lib.rs` +- [ ] Use `self.get_auth_header().await?` for the `Authorization: Bearer` header +- [ ] Use the shared `parse_response::(resp).await` helper (defined in `agents.rs`) to map HTTP status codes to `AgentIdPError` variants +- [ ] Add a doc comment (`///`) to the method with: the HTTP method + path, the success response type, and `# Errors` listing which `AgentIdPError` variants it can return +- [ ] Re-export new public types from `lib.rs` with `pub use models::{NewRequestType, NewResponseType};` +- [ ] Add a unit test using `mockito::Server` (see `token_manager.rs` tests for the pattern) +- [ ] Run `cargo test` and verify all tests pass +- [ ] Run `cargo doc --no-deps --open` and verify the new method appears with correct documentation +- [ ] Verify `cargo clippy -- -D warnings` exits 0 +``` diff --git a/openspec/changes/archive/engineering-docs-phase6-update/specs/ws4-testing/spec.md b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws4-testing/spec.md new file mode 100644 index 0000000..47a8b04 --- /dev/null +++ b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws4-testing/spec.md @@ -0,0 +1,179 @@ +# WS4 — Testing Documentation Updates + +**Target file:** `docs/engineering/09-testing.md` + +**Operation:** Append four new subsections to the end of `docs/engineering/09-testing.md`. Do not modify any existing content. + +--- + +## Instructions to Developer + +Append the following Markdown verbatim to the end of `docs/engineering/09-testing.md`, after the final line of `## 10.7 OWASP Top 10 Security Testing Reference`. + +--- + +## Content to Append + +```markdown +--- + +## 10.8 AGNTCY Conformance Test Suite + +**Location:** `tests/agntcy-conformance/conformance.test.ts` + +**Purpose:** Verifies that the AgentIdP platform conforms to the AGNTCY agent identity specification. These tests exercise live HTTP requests through the Express application against real PostgreSQL and Redis instances, exactly like integration tests — but they validate AGNTCY-specific protocol guarantees rather than individual endpoint correctness. + +**How to run:** + +```bash +# Run the conformance suite (separate Jest config) +npm run test:agntcy-conformance + +# Equivalent long form +npx jest --config tests/agntcy-conformance/jest.config.cjs + +# Run with TEST_DATABASE_URL and TEST_REDIS_URL overrides +TEST_DATABASE_URL=postgresql://sentryagent:sentryagent@localhost:5432/sentryagent_idp_test \ +TEST_REDIS_URL=redis://localhost:6379/1 \ +npm run test:agntcy-conformance + +# Enable A2A delegation conformance tests (gated by env var) +A2A_ENABLED=true npm run test:agntcy-conformance +``` + +The conformance suite uses its own `jest.config.cjs` (located in `tests/agntcy-conformance/`) so it does not run with `npm test` by default. This is intentional — the suite requires `COMPLIANCE_ENABLED=true` and optionally `A2A_ENABLED=true`, which should not be required for the standard unit/integration test run. + +**What each test validates:** + +| Conformance Test | What it validates | AGNTCY Domain | +|-----------------|-------------------|---------------| +| **Conformance 1 — Agent registration creates DID:WEB identifier** | `POST /api/v1/agents` returns a `did` field matching `did:web:*` pattern when `DID_WEB_DOMAIN` is set. The `did` field is optional in the response (test is conditional on presence) — but when present, it must conform to the `did:web:` scheme. | Non-Human Identity | +| **Conformance 2 — Token issuance via `client_credentials` grant** | Registers an agent, generates credentials via API, then exercises the full OAuth 2.0 Client Credentials flow. Validates that `POST /api/v1/token` returns a 200 response with `access_token` (string), `token_type: 'Bearer'`, and a JWT with 3 dot-separated parts. | Authentication | +| **Conformance 3 — A2A delegation chain create + verify** | _(Gated by `A2A_ENABLED=true`.)_ Creates a delegation chain between two agents via `POST /api/v1/oauth2/token/delegate`. If a token is returned, verifies it via `POST /api/v1/oauth2/token/verify-delegation`. Accepts 200 or 201 on creation and 200 or 204 on verification. | Agent-to-Agent Trust | +| **Conformance 4 — Compliance report returns valid AGNTCY structure** | Calls `GET /api/v1/compliance/report` and validates all required AGNTCY fields: `generated_at` (valid ISO 8601), `tenant_id` (string), `agntcy_schema_version: '1.0'`, `sections` (array with `name`, `status`, `details` per entry), `overall_status` (one of `pass/fail/warn`). Also verifies the `agent-identity` and `audit-trail` section names are present. A second request verifies the Redis cache (`X-Cache: HIT` header and `from_cache: true` body field). | Audit, Compliance | + +**Schema tables created by conformance suite:** The suite creates its own tables using `CREATE TABLE IF NOT EXISTS` before tests run. The tables match the production schema and include: `organizations`, `agents`, `credentials`, `audit_events`, `token_revocations`, `agent_did_keys`, `delegation_chains`. These are cleaned up via `DELETE` in `afterEach` (child-to-parent order respecting FK constraints) and dropped implicitly when the test database is reset. + +**Environment variables used:** + +| Variable | Required | Purpose | +|---|---|---| +| `TEST_DATABASE_URL` | Yes (or default) | PostgreSQL connection string for the test database | +| `TEST_REDIS_URL` | Yes (or default) | Redis connection string (index 1 recommended) | +| `COMPLIANCE_ENABLED` | Yes (`'true'`) | Enables the compliance report endpoint | +| `A2A_ENABLED` | No (default `'true'`) | Set to `'false'` to skip Conformance 3 (A2A delegation) | +| `DID_WEB_DOMAIN` | No | When set, Conformance 1 validates the `did:web:` format | + +--- + +## 10.9 Tier Enforcement Tests + +**Location:** `tests/unit/services/TierService.test.ts` and `tests/integration/` + +**The TierService has the following test cases that must all pass:** + +### Unit tests (`tests/unit/services/TierService.test.ts`) + +The unit tests mock PostgreSQL (`Pool`) and Redis (`RedisClientType`) and Stripe. Key scenarios: + +| Test | Description | +|------|-------------| +| `getStatus() — returns correct tier and limits` | Mocks `SELECT tier FROM organizations` returning `'pro'`; mocks Redis GET calls for `rate:tier:calls` and `rate:tier:tokens`; verifies `ITierStatus.limits` matches `TIER_CONFIG['pro']`. | +| `getStatus() — falls back to 0 when Redis unavailable` | Redis GET throws; verifies `usage.callsToday = 0` and `usage.tokensToday = 0` with no error thrown. | +| `getStatus() — returns 'free' when org not found` | `SELECT` returns 0 rows; verifies `tier === 'free'`. | +| `initiateUpgrade() — throws ValidationError on downgrade attempt` | `targetTier = 'free'` when current is `'pro'`; verifies `ValidationError` is thrown with `TIER_RANK` comparison failure message. | +| `initiateUpgrade() — calls Stripe with correct metadata` | Verifies `stripe.checkout.sessions.create` is called with `metadata: { orgId, targetTier }` and `mode: 'subscription'`. | +| `applyUpgrade() — executes UPDATE organizations SET tier` | Verifies parameterized SQL is called with `[targetTier, orgId]`. | +| `enforceAgentLimit() — throws TierLimitError when limit reached` | Mock agent count equals `TIER_CONFIG[tier].maxAgents`; verifies `TierLimitError` with `limit` and `current` details. | +| `enforceAgentLimit() — no-op for Enterprise tier` | `TIER_CONFIG['enterprise'].maxAgents = Infinity`; verifies no SQL query for agent count and no error. | +| `fetchTier() — returns 'free' for unknown tier string in DB` | DB returns unrecognised string; verifies `isTierName` guard returns `'free'`. | + +### Integration (middleware) tests + +When writing integration tests for the tier enforcement middleware (`src/middleware/tier.ts`), the following scenarios must be covered: + +| Scenario | Expected behaviour | +|----------|-------------------| +| Request with org on `free` tier, under daily call limit | Request proceeds normally (2xx from downstream handler) | +| Request that would exceed `maxCallsPerDay` for the org's tier | `429 TierLimitError` — body contains `code: 'TIER_LIMIT_EXCEEDED'` | +| Request to `/health` or `/metrics` (unprotected routes) | Tier middleware not applied — always 200 | +| Org not found in `organizations` table | Defaults to `free` tier limits | + +--- + +## 10.10 Analytics Service Tests + +**Location:** `tests/unit/services/AnalyticsService.test.ts` + +The AnalyticsService unit tests mock the PostgreSQL `Pool`. Key scenarios that must be covered: + +| Test | Description | +|------|-------------| +| `recordEvent() — executes UPSERT without throwing` | Verifies `pool.query` is called with the `INSERT ... ON CONFLICT DO UPDATE` SQL pattern and the correct `[tenantId, metricType]` parameters. | +| `recordEvent() — catches and swallows pool errors` | Pool `query` throws; verifies `recordEvent` resolves (not rejects) and the error does not propagate. This is the fire-and-forget contract. | +| `getTokenTrend() — clamps days to 90` | Calls with `days = 200`; verifies `pool.query` receives `clampedDays = 90` as the first parameter. | +| `getTokenTrend() — maps rows to ITokenTrendEntry[]` | Mock returns rows with `date: '2026-03-01', count: '42'`; verifies the result is `[{ date: '2026-03-01', count: 42 }]` (count coerced to number). | +| `getAgentActivity() — maps rows to IAgentActivityEntry[]` | Mock returns rows with string-typed `dow`, `hour`, `count`; verifies all are coerced to numbers in the result. | +| `getAgentUsageSummary() — maps rows to IAgentUsageSummaryEntry[]` | Mock returns rows with `token_count: '150'`; verifies `token_count: 150` (number) in the result. | +| `getAgentUsageSummary() — joins with agents table on organization_id` | Verifies the SQL query joins `agents` with `LEFT JOIN analytics_events` and filters `a.organization_id = $1`. | + +**Coverage gate:** `AnalyticsService` must maintain >80% statement, branch, function, and line coverage. Run: + +```bash +npm run test:unit -- --coverage --testPathPattern=AnalyticsService +``` + +--- + +## 10.11 Running the Complete Phase 6 Test Matrix + +All of the following must pass before any Phase 6 feature is considered complete: + +```bash +# 1. Unit tests (all services including Phase 3–6) +npm run test:unit -- --coverage +# Must exit 0 with all 4 coverage metrics ≥ 80% + +# 2. Integration tests (requires PostgreSQL + Redis running) +npm run test:integration + +# 3. AGNTCY conformance suite +COMPLIANCE_ENABLED=true \ +A2A_ENABLED=true \ +npm run test:agntcy-conformance + +# 4. Dependency security audit +npm audit --audit-level=high +# Must exit 0 — no high or critical vulnerabilities + +# 5. TypeScript compilation +npx tsc --noEmit +# Must exit 0 — zero type errors +``` + +**Current test file inventory** (as of Phase 6 completion): + +Unit test files in `tests/unit/services/`: + +| File | Service tested | +|------|---------------| +| `AgentService.test.ts` | `AgentService` | +| `AnalyticsService.test.ts` | `AnalyticsService` | +| `AuditService.test.ts` | `AuditService` | +| `AuditVerificationService.test.ts` | `AuditVerificationService` | +| `BillingService.test.ts` | `BillingService` | +| `ComplianceService.test.ts` | `ComplianceService` | +| `CredentialService.test.ts` | `CredentialService` | +| `DIDService.test.ts` | `DIDService` | +| `DelegationService.test.ts` | `DelegationService` | +| `EncryptionService.test.ts` | `EncryptionService` | +| `FederationService.test.ts` | `FederationService` | +| `IDTokenService.test.ts` | `IDTokenService` | +| `OAuth2Service.test.ts` | `OAuth2Service` | +| `OIDCKeyService.test.ts` | `OIDCKeyService` | +| `OrgService.test.ts` | `OrgService` | +| `ScaffoldService.test.ts` | `ScaffoldService` | +| `ScaffoldService.errors.test.ts` | `ScaffoldService` error cases | +| `TierService.test.ts` | `TierService` | +| `WebhookService.test.ts` | `WebhookService` | +``` diff --git a/openspec/changes/archive/engineering-docs-phase6-update/specs/ws5-remaining/spec.md b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws5-remaining/spec.md new file mode 100644 index 0000000..660ef60 --- /dev/null +++ b/openspec/changes/archive/engineering-docs-phase6-update/specs/ws5-remaining/spec.md @@ -0,0 +1,609 @@ +# WS5 — Remaining Documentation Updates + +**Targets:** 5 separate files with surgical edits. + +--- + +## File 1: `docs/engineering/01-overview.md` + +**Operation:** Replace the Phase Roadmap table (Section 4) to reflect Phase 3–6 completion status and add Phase 6 capabilities to the Product Features table. + +--- + +### Change 1a — Update Phase Roadmap Table + +**Find (Section 4, the Phase 3 row):** +``` +| Phase 3 — Enterprise | PLANNED | AGNTCY federation (cross-IdP agent identity), W3C Decentralised Identifiers (DIDs), agent marketplace, advanced compliance reporting, SOC 2 Type II certification, enterprise tier (custom retention, SLAs, advanced RBAC) | +``` + +**Replace with (3 rows — Phase 3 was completed and Phases 4–6 have been added):** +``` +| Phase 3 — Enterprise | COMPLETE | AGNTCY federation (cross-IdP agent identity), W3C Decentralised Identifiers (DIDs), agent marketplace, OIDC provider (A2A delegation), Rust SDK, developer portal (Next.js 14) | +| Phase 4 — Compliance & Security | COMPLETE | AGNTCY compliance reports (agent-identity + audit-trail sections), audit hash chain verification, SOC 2 CC6.1 AES-256-CBC column encryption (`EncryptionService`), DID document caching, federation partner JWKS caching | +| Phase 5 — Scale & Ecosystem | COMPLETE | Multi-tier subscription model (free/pro/enterprise), Stripe billing integration (`BillingService`, `TierService`), tier enforcement middleware (daily call and token limits), webhook subscriptions + delivery history (`WebhookService`), analytics service (daily event aggregation + trend queries) | +| Phase 6 — Market Expansion | COMPLETE | AGNTCY conformance test suite (4 conformance scenarios), API tiers enforced end-to-end, analytics dashboard in developer portal, full Phase 6 engineering documentation update | +``` + +--- + +### Change 1b — Add Phase 3–6 Capabilities to Product Features Table + +**Find (Section 3, the last row of the features table):** +``` +| Health Check | `GET /health` | Checks PostgreSQL and Redis connectivity; unauthenticated; used by load balancers | +``` + +**Insert the following rows after that line (before the closing of the table):** +``` +| W3C Decentralised Identifiers | `GET /api/v1/agents/:id/did`, `GET /api/v1/.well-known/did.json` | DID Core 1.0 documents; `did:web` method; EC P-256 keys; AGNTCY extension fields | +| AGNTCY Agent Cards | `GET /api/v1/agents/:id/card` | Machine-readable agent identity summary; AGNTCY schema v1.0 | +| AGNTCY Compliance Reports | `GET /api/v1/compliance/report`, `GET /api/v1/compliance/agent-cards` | Compliance sections: agent-identity + audit-trail; cached 5 min; AGNTCY schema v1.0 | +| Federation (Cross-IdP) | `POST /api/v1/federation/partners`, `GET /api/v1/federation/partners`, `POST /api/v1/federation/verify` | Register partner IdPs; verify cross-IdP JWTs using cached partner JWKS | +| A2A Delegation | `POST /api/v1/oauth2/token/delegate`, `POST /api/v1/oauth2/token/verify-delegation` | Agent-to-agent delegation tokens; OIDC provider (oidc-provider v9) mounted at `/oidc` | +| Webhook Subscriptions | `POST /api/v1/webhooks`, `GET /api/v1/webhooks`, `GET /api/v1/webhooks/:id/deliveries` | Outbound event delivery with HMAC signing; Vault-backed secrets; delivery history | +| Tier Management | `GET /api/v1/tiers/status`, `POST /api/v1/tiers/upgrade` | Free / Pro / Enterprise tiers; daily call and token limits; Stripe Checkout upgrade flow | +| Billing | `POST /api/v1/billing/checkout`, `POST /api/v1/billing/webhook`, `GET /api/v1/billing/status` | Stripe subscription management; webhook event processing | +| Analytics | Internal (via `AnalyticsService`) | Daily aggregated event counts per org; token trend queries (up to 90 days); agent activity heatmap; usage summary | +| Developer Portal | `/portal` (Next.js 14, separate process) | Get-started wizard, SDK explorer, API reference, analytics dashboard, pricing page | +``` + +--- + +### Change 1c — Update Free Tier Limits Table + +**Find (Section 6, entire table):** +``` +| Limit | Value | +|-------|-------| +| Max agents | 100 | +| Max credentials per agent | No hard cap enforced in code (5 is the documented recommendation) | +| Max tokens in flight | 10,000 per agent per calendar month | +| Token TTL | 3,600 seconds (1 hour) | +| Audit log retention | 90 days | +| API rate limit | 100 requests per minute per IP address | +``` + +**Replace with:** +``` +| Limit | Free Tier | Pro Tier | Enterprise Tier | +|-------|-----------|----------|-----------------| +| Max agents | 100 | 1,000 | Unlimited | +| Max API calls per day | Configured in `TIER_CONFIG` | Configured in `TIER_CONFIG` | Unlimited | +| Max tokens per day | Configured in `TIER_CONFIG` | Configured in `TIER_CONFIG` | Unlimited | +| Token TTL | 3,600 seconds (1 hour) | 3,600 seconds (1 hour) | 3,600 seconds (1 hour) | +| Audit log retention | 90 days | 1 year | Custom | +| API rate limit (per IP) | 100 req/min | 100 req/min | 100 req/min | +| Webhook subscriptions | 0 | 10 | Unlimited | +| Analytics retention | 90 days | 1 year | Custom | + +Tier limits are configured in `src/config/tiers.ts` (`TIER_CONFIG`). Enforcement is handled by `TierService.enforceAgentLimit()` (agent cap) and `src/middleware/tier.ts` (daily call/token caps). Tier upgrades are initiated via `POST /api/v1/tiers/upgrade` and confirmed via the Stripe webhook. +``` + +--- + +## File 2: `docs/engineering/03-tech-stack.md` + +**Operation:** Append new ADR entries after the existing `### ADR-10: Terraform` section. + +**Find (last line of the file):** +``` +**Consequences**: All infrastructure changes must go through Terraform. No manual edits +via the AWS console or GCP console are permitted — they will be overwritten on the next +`terraform apply`. Terraform state is stored in a remote backend and must not be edited +manually. +``` + +**Append the following after that line:** + +```markdown +--- + +### ADR-11: Stripe + +**Status**: Adopted +**Component**: Billing — subscription management and payment processing + +**Decision**: Use Stripe as the payment processing and subscription management platform. The `stripe` npm package (v21+) handles Checkout Session creation, webhook event verification, and subscription lifecycle events. + +**Rationale**: Stripe's hosted Checkout flow eliminates the need to handle PCI-DSS scope for card data. The `stripe.webhooks.constructEvent()` method uses HMAC-SHA256 to verify incoming webhook payloads, preventing replay attacks. The `checkout.session.completed` event carries `metadata: { orgId, targetTier }`, allowing `BillingService` to delegate tier upgrades to `TierService.applyUpgrade()` without coupling billing logic to tier logic. + +**Alternatives considered**: +- Paddle — rejected because its global merchant-of-record model introduced complexities with the open-source free tier. +- Braintree — rejected because Stripe's webhook reliability and developer experience are superior. + +**Consequences**: Stripe requires `STRIPE_SECRET_KEY` (for API calls) and `STRIPE_WEBHOOK_SECRET` (`whsec_...`, for webhook verification). Per-tier Stripe price IDs are configured via `STRIPE_PRICE_ID_PRO` and `STRIPE_PRICE_ID_ENTERPRISE`. All billing webhook handlers must pass the raw `Buffer` body (not parsed JSON) to `stripe.webhooks.constructEvent()` — use `express.raw()` middleware on the webhook route. + +--- + +### ADR-12: oidc-provider (A2A Delegation) + +**Status**: Adopted +**Component**: A2A delegation — OIDC provider for agent-to-agent trust tokens + +**Decision**: Use the `oidc-provider` npm package (v9.7.x) as the OIDC provider for issuing A2A delegation tokens. The provider is mounted as a sub-application at `/oidc` within the Express app. + +**Rationale**: `oidc-provider` is a certified OpenID Connect implementation that handles the full OIDC protocol, including JWKS serving, token endpoint, and discovery document. Rather than implementing a custom delegation token format, using a standards-compliant OIDC provider means delegation tokens can be verified by any OIDC-aware party using the published JWKS at `/oidc/jwks`. + +**Alternatives considered**: +- Custom JWT signing — rejected because hand-rolled token formats cannot benefit from OIDC tooling and interoperability. + +**Consequences**: `A2A_ENABLED` env var gates the OIDC provider — when set to `'false'`, delegation endpoints return 404. The `OIDC_ISSUER` env var must be set to the full base URL of the OIDC provider (e.g. `https://api.sentryagent.ai`). + +--- + +### ADR-13: Next.js 14 (Developer Portal) + +**Status**: Adopted +**Component**: Developer Portal (`portal/`) — public-facing documentation and onboarding + +**Decision**: Use Next.js 14 (App Router) with Tailwind CSS for the developer portal. The portal is a separate process served on its own port (independent of the Express API server). + +**Rationale**: The developer portal has different performance and SEO requirements than the internal operator dashboard (`dashboard/`). Next.js 14's App Router supports React Server Components, which allows the marketing and documentation pages to be statically generated while the analytics dashboard and API Explorer are client-rendered. Tailwind CSS enables rapid UI development consistent with the design system. + +**Alternatives considered**: +- Extending the Vite dashboard — rejected because the developer portal requires server-side rendering for SEO on marketing pages, which Vite does not provide. +- Docusaurus — rejected because the portal includes interactive components (Swagger Explorer, analytics charts) that are not well-suited to a documentation-only tool. + +**Consequences**: The portal (`portal/`) has its own `package.json`, `tsconfig.json`, `tailwind.config.ts`, and `next.config.js`. It is built and run independently: `cd portal && npm install && npm run dev`. The portal calls the AgentIdP REST API using the same `@sentryagent/idp-sdk` as the dashboard. + +--- + +### ADR-14: bull (Job Queue) + kafkajs (Event Streaming) + +**Status**: Adopted (opt-in) +**Component**: Async job processing and event streaming + +**Decision**: Use `bull` (Redis-backed job queue) for async webhook delivery retries and `kafkajs` for event streaming to external consumers. Both are opt-in — the system operates correctly without Kafka configured. + +**Rationale**: Webhook delivery requires retry logic with exponential backoff and dead-letter handling. `bull` provides this out of the box using the existing Redis dependency. `kafkajs` enables high-throughput event streaming for analytics and audit events to external data pipelines without blocking the primary request path. + +**Alternatives considered**: +- BullMQ — considered as a more modern alternative to `bull` but rejected to avoid adding a new package family during Phase 6. Migration is a future backlog item. + +**Consequences**: Kafka is entirely optional. When `KAFKA_BROKERS` is not set, `kafkajs` is not initialised and no events are published. The `bull` queue for webhook delivery requires only the existing Redis instance. + +--- + +### ADR-15: did-resolver + web-did-resolver (W3C DIDs) + +**Status**: Adopted +**Component**: W3C DID Core 1.0 document resolution + +**Decision**: Use `did-resolver` (v4.1.x) as the DID resolution framework and `web-did-resolver` (v2.0.x) for the `did:web` method implementation. + +**Rationale**: `did-resolver` provides a pluggable resolver interface used by both the server (for internal resolution) and by third parties who want to verify AgentIdP-issued DIDs. The `did:web` method maps DID identifiers to HTTPS URLs hosting the DID document JSON, requiring no blockchain. `DIDService` generates documents that conform to the W3C DID Core 1.0 specification and include AGNTCY-specific extension fields. + +**Consequences**: `DID_WEB_DOMAIN` env var is required for DID generation. DID documents are cached in Redis (`did:doc:`, TTL from `DID_DOCUMENT_CACHE_TTL_SECONDS`, default 300s). Private keys are stored in HashiCorp Vault KV v2 when Vault is configured; in dev mode, a `dev:no-vault` marker is stored and keys are ephemeral. +``` + +--- + +## File 3: `docs/engineering/04-codebase-structure.md` + +**Operation:** Two surgical edits — update the directory tree and update the `src/` subdirectory table. + +--- + +### Change 3a — Update the Annotated Directory Tree + +**Find (inside the code block in Section 1, after the `sdk-java/` line):** +``` +├── policies/ # OPA policy files +``` + +**Replace the entire block from `├── policies/` down through `└── jest.config.ts # Jest configuration — ts-jest, test timeouts, coverage thresholds` with the following updated version:** + +``` +├── sdk-rust/ # Rust SDK (sentryagent-idp crate) — async, tokio, reqwest, typed errors +├── policies/ # OPA policy files +│ ├── authz.rego # Rego policy — normalise_path + scope-intersection allow rule +│ └── data/scopes.json # Endpoint permission map — used by Rego and TypeScript fallback +├── portal/ # Developer Portal — Next.js 14 App Router, Tailwind CSS +│ ├── app/ # Next.js App Router pages (get-started, pricing, sdks, analytics, settings, login) +│ ├── components/ # Shared UI components (Nav.tsx, SwaggerExplorer.tsx, GetStartedWizard.tsx) +│ ├── hooks/ # React hooks (useAuth.ts) +│ └── types/ # TypeScript type definitions for portal-only types +├── terraform/ # Terraform infrastructure as code +│ ├── modules/ # Reusable modules: agentidp, lb, rds, redis +│ └── environments/ # Environment configs: aws/ (ECS+RDS+ElastiCache), gcp/ (Cloud Run+SQL+Memorystore) +├── monitoring/ # Prometheus and Grafana configuration +│ ├── prometheus/ # prometheus.yml scrape configuration +│ └── grafana/ # Grafana provisioning YAML and dashboard JSON files +├── docs/ # All project documentation +│ ├── engineering/ # Internal engineering knowledge base (this directory) +│ ├── developers/ # End-user API reference and developer guides +│ ├── devops/ # Operator runbooks and environment variable reference +│ ├── agntcy/ # AGNTCY alignment documentation +│ └── openapi/ # OpenAPI 3.0 specification files +├── openspec/ # OpenSpec change management — proposals, designs, specs, tasks, archives +├── tests/ # Jest test suite — mirrors src/ structure +│ ├── unit/ # Unit tests (mocked dependencies) — mirrors src/ +│ ├── integration/ # Integration tests (real DB + Redis) +│ ├── agntcy-conformance/ # AGNTCY conformance test suite (separate Jest config) +│ └── load/ # k6 load test scripts +├── Dockerfile # Multi-stage production build (build + runtime stages) +├── docker-compose.yml # Local development: PostgreSQL 14 (port 5432) + Redis 7 (port 6379) +├── docker-compose.monitoring.yml # Monitoring overlay: Prometheus (port 9090) + Grafana (port 3001) +├── package.json # Node.js dependencies and npm scripts +├── tsconfig.json # TypeScript strict configuration — compiled to dist/ +└── jest.config.ts # Jest configuration — ts-jest, test timeouts, coverage thresholds +``` + +--- + +### Change 3b — Add New src/ Subdirectories to Section 2 + +**Find (Section 2 table, the last row):** +``` +| `src/cache/` | Redis client factory — creates and caches a single `redis` client instance | Client is a singleton created once in `src/app.ts` and passed to repositories | +``` + +**Insert these rows after that line:** +``` +| `src/config/` | Configuration constants — `tiers.ts` exports `TIER_CONFIG`, `TIER_RANK`, `TierName`, and `isTierName()` type guard | Imported by `TierService` and `tierMiddleware`; never imports from services | +| `src/middleware/tier.ts` | Tier enforcement middleware — reads org tier from `TierService`, checks daily call counter in Redis, throws `TierLimitError` (429) when limit is exceeded, increments counter on pass | Applied only to API routes; skips `/health`, `/metrics`, and static file routes | +``` + +--- + +### Change 3c — Add New Entries to Section 3 (Where to Add New Code) + +**Find (Section 3 table, after the `A new Prometheus metric` row):** +``` +| A new TypeScript type used in 2+ files | `src/types/index.ts` | A new `AgentGroupMembership` interface | +``` + +**Insert these rows after that line:** +``` +| A new tier-gated feature | `src/config/tiers.ts` (add limit field) + `src/middleware/tier.ts` (add check) + service (enforce) | Adding a `maxWebhooksPerOrg` tier limit | +| A webhook event handler | `src/services/WebhookService.ts` (add event type to `WebhookEventType`) + the producer that calls `void webhookService.dispatch(orgId, eventType, payload)` | Emitting `agent.decommissioned` events to subscriber URLs | +| A new analytics metric type | `src/services/AnalyticsService.ts` (call `recordEvent(tenantId, 'new_metric')` in the relevant service using `void`) | Recording `credential_rotated` events for analytics | +| A new DID endpoint | `src/controllers/DIDController.ts` + `src/routes/did.ts` + `src/services/DIDService.ts` (if new method needed) + `policies/data/scopes.json` | Adding `GET /api/v1/agents/:id/did/rotate-key` | +``` + +--- + +## File 4: `docs/engineering/README.md` + +**Operation:** Replace the reading order table and quick reference table to reflect all Phase 6 additions. + +--- + +### Change 4a — Update Reading Order Table + +**Find (Section "Reading Order (New Engineers Start Here)", the last row):** +``` +| 11 | [SDK Integration Guide](11-sdk-guide.md) | All 4 SDKs — installation, examples, contribution guide | 20 min | +``` + +**Replace with (adds the Rust SDK to the description and updates the estimated time):** +``` +| 11 | [SDK Integration Guide](11-sdk-guide.md) | All 5 SDKs (Node.js, Python, Go, Java, Rust) — installation, examples, contribution guide | 25 min | +``` + +**Find (the line after the table):** +``` +**Total estimated reading time for new engineers: ~3.5 hours** +``` + +**Replace with:** +``` +**Total estimated reading time for new engineers: ~4 hours** +``` + +--- + +### Change 4b — Update "Service Deep Dives" Entry + +**Find:** +``` +| 5 | [Service Deep Dives](05-services.md) | All 8 services/components — purpose, interface, schema, error types | 30 min | +``` + +**Replace with:** +``` +| 5 | [Service Deep Dives](05-services.md) | All 17 services/components (incl. Phase 3–6: AnalyticsService, TierService, ComplianceService, FederationService, DIDService, WebhookService, BillingService, DelegationService, OIDCService) — purpose, interface, schema, error types | 45 min | +``` + +--- + +### Change 4c — Update Quick Reference Table + +**Find (in the Quick Reference section):** +``` +| Integrate with the SDK | [11-sdk-guide.md](11-sdk-guide.md) | +``` + +**Replace with:** +``` +| Integrate with the SDK (Node.js, Python, Go, Java, Rust) | [11-sdk-guide.md](11-sdk-guide.md) | +``` + +**Find (after the "Integrate with the SDK" row):** +``` +| Understand why a technology was chosen | [03-tech-stack.md](03-tech-stack.md) | +``` + +**Insert after that row:** +``` +| Understand tier limits and billing | [01-overview.md](01-overview.md) (Section 6) + [03-tech-stack.md](03-tech-stack.md) (ADR-11) | +| Understand AGNTCY compliance reports | [05-services.md](05-services.md) (ComplianceService) | +| Understand the A2A delegation flow | [06-walkthroughs.md](06-walkthroughs.md) (Walkthrough 4) | +| Run the AGNTCY conformance suite | [09-testing.md](09-testing.md) (Section 10.8) | +| Add a new Rust SDK endpoint | [11-sdk-guide.md](11-sdk-guide.md) (Section 6 contribution guide) | +``` + +--- + +## File 5: `docs/engineering/06-walkthroughs.md` + +**Operation:** Append three new walkthrough sections at the end of the file. + +**Find (the last line of the file):** +``` +Returns `ICredentialWithSecret` — the updated credential including the new +`clientSecret`. This is the only time the new secret is ever returned. The caller +must store it securely. +``` + +**Append the following after that final JSON block:** + +```markdown +--- + +## Walkthrough 4 — A2A Delegation End-to-End + +**Request:** `POST /api/v1/oauth2/token/delegate` — one AI agent delegating a scoped capability to another + +This walkthrough traces how agent A (an orchestrator) issues a delegation token that grants agent B (a sub-agent) the right to act on its behalf with a restricted scope. + +--- + +### Step 1 — Route dispatch + +**File:** `src/routes/delegation.ts` + +```typescript +router.post( + '/token/delegate', + asyncHandler(authMiddleware), + opaMiddleware, + asyncHandler(delegationController.createDelegation.bind(delegationController)) +); +``` + +Both `authMiddleware` and `opaMiddleware` run. The OPA policy requires scope `agents:write` for delegation creation. + +--- + +### Step 2 — Controller: extract delegator and validate + +**File:** `src/controllers/DelegationController.ts` + +```typescript +const delegatorId = req.user.sub; // From the Bearer token's sub claim +const { delegatee_id, scope, expires_at } = req.body; +``` + +The controller validates that `delegatee_id` is a non-empty UUID, `scope` is a non-empty string, and `expires_at` (if provided) is a valid ISO 8601 datetime in the future. It passes these to `DelegationService.createDelegation()`. + +--- + +### Step 3 — Service: verify both agents exist + +**File:** `src/services/DelegationService.ts` + +```typescript +const delegator = await this.agentRepository.findById(delegatorId); +if (!delegator || delegator.status !== 'active') { throw new AgentNotFoundError(delegatorId) } + +const delegatee = await this.agentRepository.findById(delegateeId); +if (!delegatee || delegatee.status !== 'active') { throw new AgentNotFoundError(delegateeId) } +``` + +Both agents must exist and be in `active` status. A suspended or decommissioned agent cannot participate in delegation. + +--- + +### Step 4 — Service: insert delegation chain record + +**File:** `src/services/DelegationService.ts` + +```typescript +await this.pool.query( + `INSERT INTO delegation_chains (chain_id, delegator_id, delegatee_id, scope, status, expires_at) + VALUES ($1, $2, $3, $4, 'active', $5)`, + [chainId, delegatorId, delegateeId, scope, expiresAt] +); +``` + +The `chain_id` is a UUID generated by the service. The `delegation_chains` table provides the authoritative source of truth for which delegations are active, independent of any token. + +--- + +### Step 5 — Response + +```json +{ + "chain_id": "f1e2d3c4-...", + "token": "eyJhbGciOiJSUzI1NiJ9...", + "delegator_id": "a1b2c3d4-...", + "delegatee_id": "b2c3d4e5-...", + "scope": "agents:read", + "status": "active", + "expires_at": "2026-04-05T00:00:00Z" +} +``` + +The `token` field is the signed delegation JWT. The delegatee presents this token to `POST /api/v1/oauth2/token/verify-delegation` to prove it has authority to act on the delegator's behalf. + +**Why store both the DB record and the JWT?** The DB record allows revocation — when the delegator calls `DELETE /api/v1/delegation-chains/:chainId`, the record is soft-deleted and all subsequent `verify-delegation` calls will fail even if the JWT itself has not yet expired. + +--- + +## Walkthrough 5 — Tier Enforcement Request Lifecycle + +**Request:** Any authenticated API request when the organisation's daily call limit is reached + +This walkthrough traces how `tierMiddleware` intercepts a request before it reaches the OPA middleware, preventing quota-exceeded traffic from consuming service resources. + +--- + +### Step 1 — Auth middleware passes + +Same as Walkthrough 2, Step 3. The Bearer JWT is verified and `req.user` is populated with `sub` (agentId) and `organization_id`. + +--- + +### Step 2 — Tier middleware: fetch org tier + +**File:** `src/middleware/tier.ts` + +```typescript +const orgId = req.user.organization_id; +const tier = await tierService.fetchTier(orgId); +const config = TIER_CONFIG[tier]; +``` + +`fetchTier()` issues `SELECT tier FROM organizations WHERE organization_id = $1`. Returns `'free'` if no row is found (safe default). + +--- + +### Step 3 — Tier middleware: read daily counter + +**File:** `src/middleware/tier.ts` + +```typescript +const callsKey = `rate:tier:calls:${orgId}`; +const callsToday = await redis.get(callsKey); +const count = callsToday !== null ? parseInt(callsToday, 10) : 0; + +if (count >= config.maxCallsPerDay) { + throw new TierLimitError('calls', config.maxCallsPerDay, { orgId, tier, current: count }); +} +``` + +The Redis key `rate:tier:calls:` is read. If null (first call of the day), count is 0. When count equals or exceeds the tier limit, `TierLimitError` (HTTP 429) is thrown immediately — no further middleware runs. + +--- + +### Step 4 — Tier middleware: increment counter (fire-and-forget) + +**File:** `src/middleware/tier.ts` + +```typescript +// Set TTL to next UTC midnight if key is new +void redis.multi() + .incr(callsKey) + .expireAt(callsKey, nextUtcMidnightUnix()) + .exec(); +next(); +``` + +The counter is incremented atomically using a Redis MULTI block. The `EXPIREAT` command sets the key to auto-delete at the next UTC midnight, resetting the daily counter without any scheduled job. The increment is fire-and-forget — the request proceeds immediately to `opaMiddleware`. + +**Why expire at UTC midnight rather than a rolling 24-hour window?** Tier limits are documented as "per day", which users interpret as resetting at midnight. A rolling window would allow a user to consume their full daily quota twice within a 48-hour period straddling midnight, which is counterintuitive. UTC midnight is predictable and easy to reason about. + +--- + +### Step 5 — Error handler serialises TierLimitError + +**File:** `src/middleware/errorHandler.ts` + +```json +HTTP 429 +{ + "code": "TIER_LIMIT_EXCEEDED", + "message": "Daily API call limit reached for your tier.", + "details": { + "tier": "free", + "limit": 1000, + "current": 1000 + } +} +``` + +The `Retry-After` header is set to the number of seconds until next UTC midnight so clients can implement automatic backoff. + +--- + +## Walkthrough 6 — Analytics Event Capture Flow + +**Trigger:** Any successful token issuance (`POST /api/v1/token`) + +This walkthrough traces how an analytics event is captured without affecting the latency of the primary token issuance response. + +--- + +### Step 1 — Token issuance completes + +**File:** `src/services/OAuth2Service.ts` + +```typescript +const accessToken = signToken(payload, this.privateKey); +// Primary response is ready — analytics is now fire-and-forget +void this.analyticsService.recordEvent(tenantId, 'token_issued'); +tokensIssuedTotal.inc({ scope }); +``` + +The `signToken()` call completes synchronously (RSA signing is CPU-bound, not I/O). The controller can now send the response. `analyticsService.recordEvent()` is called with `void` — the `await` is deliberately omitted. + +**Why `void` instead of `await`?** Token issuance latency must remain below 100ms (per the QA performance gate). A PostgreSQL write adds 5–15ms. Since analytics data is aggregated (not transactional), losing an occasional event due to an error is acceptable. The response is never delayed for analytics. + +--- + +### Step 2 — AnalyticsService: UPSERT daily counter + +**File:** `src/services/AnalyticsService.ts` + +```typescript +async recordEvent(tenantId: string, metricType: string): Promise { + try { + await this.pool.query( + `INSERT INTO analytics_events (organization_id, date, metric_type, count) + VALUES ($1, CURRENT_DATE, $2, 1) + ON CONFLICT (organization_id, date, metric_type) + DO UPDATE SET count = analytics_events.count + 1`, + [tenantId, metricType], + ); + } catch (err) { + console.error('[AnalyticsService] recordEvent failed — primary path unaffected', err); + } +} +``` + +The `ON CONFLICT DO UPDATE` upsert is atomic. Whether this is the first or the ten-thousandth `token_issued` event for this tenant today, the row is updated correctly. All errors are caught and swallowed — the token has already been returned to the caller. + +**Why one row per day per metric, not one row per event?** Storing a row per event would create millions of rows. The daily aggregate model keeps the table compact while still providing daily trend data (the granularity that analytics dashboards need). Sub-day granularity is available from the Prometheus `agentidp_tokens_issued_total` counter if needed. + +--- + +### Step 3 — Dashboard query (deferred) + +When a developer visits the analytics page in the developer portal, the portal calls: + +``` +GET /api/v1/analytics/token-trend?days=30 +``` + +**File:** `src/services/AnalyticsService.ts` — `getTokenTrend(tenantId, 30)` + +```sql +SELECT + gs.date::DATE::TEXT AS date, + COALESCE(ae.count, 0)::INTEGER AS count +FROM generate_series( + CURRENT_DATE - 29 * INTERVAL '1 day', + CURRENT_DATE, + INTERVAL '1 day' +) AS gs(date) +LEFT JOIN analytics_events ae + ON ae.date = gs.date::DATE + AND ae.organization_id = $2 + AND ae.metric_type = 'token_issued' +ORDER BY gs.date ASC +``` + +The `generate_series` + `LEFT JOIN` pattern ensures all 30 days appear in the result, with `count: 0` for days with no events. This avoids the need for the client to fill in gaps. +``` diff --git a/openspec/changes/archive/phase-7-devops-field-trial/.openspec.yaml b/openspec/changes/archive/phase-7-devops-field-trial/.openspec.yaml new file mode 100644 index 0000000..fe046f2 --- /dev/null +++ b/openspec/changes/archive/phase-7-devops-field-trial/.openspec.yaml @@ -0,0 +1,14 @@ +id: phase-7-devops-field-trial +title: "DevOps Documentation Update + In-House Field Trial Guide" +status: complete +proposed: 2026-04-04 +approved: 2026-04-04 +approved-by: CEO +completed: 2026-04-04 +workstreams: + - id: WS1 + title: "Update Existing DevOps Docs (Phase 3–6 gaps)" + status: complete + - id: WS2 + title: "New In-House Field Trial Guide" + status: complete diff --git a/openspec/changes/archive/phase-7-devops-field-trial/proposal.md b/openspec/changes/archive/phase-7-devops-field-trial/proposal.md new file mode 100644 index 0000000..11f0eb8 --- /dev/null +++ b/openspec/changes/archive/phase-7-devops-field-trial/proposal.md @@ -0,0 +1,35 @@ +# OpenSpec Proposal — phase-7-devops-field-trial + +**Status:** Approved +**Proposed:** 2026-04-04 +**Approved by:** CEO + +## Problem Statement + +The existing `docs/devops/` was written during Phase 2 (archived 2026-04-02). Phases 3–6 added +substantial new infrastructure — 14 new DB migrations, Phase 6 feature flags, Stripe integration, +new services (Analytics, Tier, Compliance, A2A), and the Next.js portal — none of which are +reflected in the current DevOps documentation. + +Additionally, the team is preparing for in-house Docker Compose field trials and has no +deployment execution guide. + +## Proposed Solution + +**WS1 — Update Existing DevOps Docs** +Audit and update all files in `docs/devops/` to reflect the current Phase 6 state of the +codebase. Every env var, migration, service, and port must be accurate. + +**WS2 — New In-House Field Trial Guide** +Create `docs/devops/field-trial.md` — a step-by-step execution playbook covering: +- Phase A: Stack startup verification +- Phase B: Core product end-to-end journeys +- Phase C: Security guardrails and tenant isolation +- Phase D: Next.js portal verification +- Phase E: AGNTCY conformance suite against live environment +- Phase F: Performance baseline + +## CEO Approval + +Approved 2026-04-04 per CEO directive: "YES Approved per CTO recommendations — implement your +plan per Openspec Protocol" diff --git a/openspec/changes/archive/phase-7-devops-field-trial/specs/devops-update/spec.md b/openspec/changes/archive/phase-7-devops-field-trial/specs/devops-update/spec.md new file mode 100644 index 0000000..3a89ce4 --- /dev/null +++ b/openspec/changes/archive/phase-7-devops-field-trial/specs/devops-update/spec.md @@ -0,0 +1,1217 @@ +# Spec — WS1: DevOps Documentation Update + +**Change:** phase-7-devops-field-trial +**Workstream:** WS1 +**Status:** Approved +**Written:** 2026-04-04 + +## Purpose + +Specify exactly what must be updated in each `docs/devops/` file to reflect the Phase 6 state +of the codebase. The existing docs were written during Phase 2. Phases 3–6 added 14 DB +migrations, new services, feature flags, Stripe billing, Prometheus metrics, Redis key patterns, +and the Next.js portal — none of which appear in the current docs. + +The Developer implementing this spec must update each listed file precisely as described. No +other content in those files should be changed unless explicitly stated. + +--- + +## File: `docs/devops/environment-variables.md` + +### Section: Required Variables — update `DATABASE_URL` description + +Replace the sentence: + +> The application uses `pg.Pool` with this connection string. Connection pool size uses the `pg` +> default (10 connections). + +With: + +> The application uses `pg.Pool` with this connection string. Pool sizing is controlled by the +> optional `DB_POOL_*` variables documented below. + +### Section: Required Variables — add `STRIPE_SECRET_KEY` after `DATABASE_URL` + +Insert a new required variable block: + +> **Note on Billing:** `STRIPE_SECRET_KEY`, `STRIPE_WEBHOOK_SECRET`, and `STRIPE_PRICE_ID` are +> required when `BILLING_ENABLED=true`. For local development, set `BILLING_ENABLED=false` and +> use placeholder values. + +### Section: Optional Variables — add all Phase 3–6 variables + +Add each of the following variable blocks in this order, after the existing `VAULT_MOUNT` block +and before `POLICY_DIR`: + +--- + +#### `BILLING_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `false` | +| **Values** | `true`, `false` | +| **Example** | `BILLING_ENABLED=false` | + +Gates Stripe billing integration and free-tier agent limit enforcement. When `false`, no Stripe +API calls are made and all tier limits are unenforced. Set to `false` for in-house testing. + +--- + +#### `STRIPE_SECRET_KEY` + +| | | +|-|-| +| **Required** | Only when `BILLING_ENABLED=true` | +| **Format** | Stripe secret key string (`sk_live_*` or `sk_test_*`) | +| **Example** | `STRIPE_SECRET_KEY=sk_test_placeholder` | + +Stripe API key used to create Checkout Sessions for tier upgrades. Never use a live key in +development. + +--- + +#### `STRIPE_WEBHOOK_SECRET` + +| | | +|-|-| +| **Required** | Only when `BILLING_ENABLED=true` | +| **Format** | Stripe webhook signing secret (`whsec_*`) | +| **Example** | `STRIPE_WEBHOOK_SECRET=whsec_placeholder` | + +Used to verify the HMAC signature on incoming Stripe webhook events. Without this, the billing +webhook endpoint will reject all events. + +--- + +#### `STRIPE_PRICE_ID` + +| | | +|-|-| +| **Required** | Only when `BILLING_ENABLED=true` | +| **Format** | Stripe Price ID string (`price_*`) | +| **Example** | `STRIPE_PRICE_ID=price_placeholder` | + +The Stripe Price object used when creating a Checkout Session for the Pro tier upgrade. + +--- + +#### `ANALYTICS_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `true` | +| **Values** | `true`, `false` | +| **Example** | `ANALYTICS_ENABLED=true` | + +Feature flag that gates the `/api/v1/analytics/*` routes. When `false`, the analytics router is +not mounted and all analytics endpoints return 404. Events are still recorded internally +regardless of this flag. + +--- + +#### `TIER_ENFORCEMENT` + +| | | +|-|-| +| **Required** | No | +| **Default** | `true` | +| **Values** | `true`, `false` | +| **Example** | `TIER_ENFORCEMENT=true` | + +Enables Redis-backed tier limit enforcement per tenant. When `true`, the `tierEnforcement` +middleware checks daily API call and token counts against per-tier limits defined in +`src/config/tiers.ts`. Enterprise tenants with `maxCallsPerDay: Infinity` bypass enforcement. +When `false`, no tier limits are enforced. + +--- + +#### `COMPLIANCE_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `true` | +| **Values** | `true`, `false` | +| **Example** | `COMPLIANCE_ENABLED=true` | + +Feature flag that gates the report and agent-card export endpoints under +`/api/v1/compliance/*`. When `false`, those endpoints return 404. The SOC2 controls endpoint +(`/api/v1/compliance/controls`) and audit chain verification (`/api/v1/audit/verify`) are +always enabled regardless of this flag. + +--- + +#### `REDIS_RATE_LIMIT_ENABLED` + +| | | +|-|-| +| **Required** | No | +| **Default** | `false` | +| **Values** | `true`, `false` | +| **Example** | `REDIS_RATE_LIMIT_ENABLED=true` | + +When `true`, rate limiting uses a Redis-backed sliding-window counter per `client_id`. When +`false`, rate limiting uses an in-process `RateLimiterMemory` store (does not share state +across multiple app instances). + +--- + +#### `RATE_LIMIT_WINDOW_MS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `60000` | +| **Format** | Integer (milliseconds) | +| **Example** | `RATE_LIMIT_WINDOW_MS=60000` | + +Duration of the sliding-window rate limit period in milliseconds. Only effective when +`REDIS_RATE_LIMIT_ENABLED=true`. + +--- + +#### `RATE_LIMIT_MAX_REQUESTS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `100` | +| **Format** | Integer | +| **Example** | `RATE_LIMIT_MAX_REQUESTS=100` | + +Maximum number of requests allowed per `client_id` within `RATE_LIMIT_WINDOW_MS`. Requests +exceeding this limit receive `429 RATE_LIMIT_EXCEEDED`. + +--- + +#### `DB_POOL_MAX` + +| | | +|-|-| +| **Required** | No | +| **Default** | `20` | +| **Format** | Integer | +| **Example** | `DB_POOL_MAX=20` | + +Maximum number of PostgreSQL connections in the pool. Increase for high-throughput production +deployments. Ensure your PostgreSQL instance's `max_connections` is set to at least +`DB_POOL_MAX × number_of_app_instances + 5`. + +--- + +#### `DB_POOL_MIN` + +| | | +|-|-| +| **Required** | No | +| **Default** | `2` | +| **Format** | Integer | +| **Example** | `DB_POOL_MIN=2` | + +Minimum number of idle connections kept alive in the pool. + +--- + +#### `DB_POOL_IDLE_TIMEOUT_MS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `30000` | +| **Format** | Integer (milliseconds) | +| **Example** | `DB_POOL_IDLE_TIMEOUT_MS=30000` | + +Milliseconds a connection can sit idle before being evicted from the pool. + +--- + +#### `DB_POOL_CONNECTION_TIMEOUT_MS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `5000` | +| **Format** | Integer (milliseconds) | +| **Example** | `DB_POOL_CONNECTION_TIMEOUT_MS=5000` | + +Milliseconds the pool waits for a connection to become available before throwing a connection +timeout error. + +--- + +#### `VAULT_KV_MOUNT` + +| | | +|-|-| +| **Required** | No | +| **Default** | `secret` | +| **Format** | String (no leading or trailing slash) | +| **Example** | `VAULT_KV_MOUNT=agentidp` | + +KV v2 secrets engine mount path used by `VaultService`. Equivalent to the existing `VAULT_MOUNT` +variable — note that `.env.example` uses `VAULT_KV_MOUNT`; the underlying service reads either. + +--- + +#### `OPA_URL` + +| | | +|-|-| +| **Required** | No | +| **Format** | URL string | +| **Example** | `OPA_URL=http://localhost:8181` | + +URL of a running OPA server for external policy evaluation. When unset, the application falls +back to the embedded Wasm or JSON policy in `POLICY_DIR`. Used for health check reporting. + +--- + +#### `KAFKA_BROKERS` + +| | | +|-|-| +| **Required** | No | +| **Format** | Comma-separated broker addresses | +| **Example** | `KAFKA_BROKERS=localhost:9092` | + +When set, the `KafkaAdapter` publishes domain events to Kafka. When unset, Kafka publishing is +disabled and events are only delivered via the `WebhookService`. + +--- + +#### `ENFORCE_TLS` + +| | | +|-|-| +| **Required** | No | +| **Default** | `false` | +| **Values** | `true`, `false` | +| **Example** | `ENFORCE_TLS=true` | + +When `true`, the `tlsEnforcementMiddleware` redirects all HTTP requests to HTTPS. Enable in +production deployments where TLS termination is handled at the application layer. + +--- + +### Section: Complete `.env` Example — replace entirely + +Replace the entire existing `.env` Example section with the following complete example that +reflects all Phase 1–6 variables: + +``` +# ── Server ────────────────────────────────────────────────────────────────── +NODE_ENV=development +PORT=3000 +CORS_ORIGIN=http://localhost:3001 + +# ── Database ───────────────────────────────────────────────────────────────── +DATABASE_URL=postgresql://sentryagent:sentryagent@localhost:5432/sentryagent_idp +DB_POOL_MAX=20 +DB_POOL_MIN=2 +DB_POOL_IDLE_TIMEOUT_MS=30000 +DB_POOL_CONNECTION_TIMEOUT_MS=5000 + +# ── Redis ──────────────────────────────────────────────────────────────────── +REDIS_URL=redis://localhost:6379 +REDIS_RATE_LIMIT_ENABLED=true +RATE_LIMIT_WINDOW_MS=60000 +RATE_LIMIT_MAX_REQUESTS=100 + +# ── JWT Keys (generate with openssl — see docs/devops/security.md) ────────── +JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\nMIIEow...\n-----END RSA PRIVATE KEY-----" +JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\nMIIBIj...\n-----END PUBLIC KEY-----" + +# ── Billing (Stripe) — set BILLING_ENABLED=false for local/in-house testing ─ +BILLING_ENABLED=false +STRIPE_SECRET_KEY=sk_test_placeholder +STRIPE_WEBHOOK_SECRET=whsec_placeholder +STRIPE_PRICE_ID=price_placeholder + +# ── Phase 6 Feature Flags ───────────────────────────────────────────────────── +ANALYTICS_ENABLED=true +TIER_ENFORCEMENT=true +COMPLIANCE_ENABLED=true + +# ── HashiCorp Vault (optional) ──────────────────────────────────────────────── +# VAULT_ADDR=http://127.0.0.1:8200 +# VAULT_TOKEN=hvs.XXXXXXXXXXXXXXXXXXXXXX +# VAULT_KV_MOUNT=secret + +# ── OPA (optional) ─────────────────────────────────────────────────────────── +# POLICY_DIR=/etc/sentryagent/policies +# OPA_URL=http://localhost:8181 + +# ── Kafka (optional) ───────────────────────────────────────────────────────── +# KAFKA_BROKERS=localhost:9092 + +# ── TLS ────────────────────────────────────────────────────────────────────── +# ENFORCE_TLS=true +``` + +### Section: Variable Validation at Startup — add note on feature flags + +Append after the existing validation list: + +> **Feature flags** (`BILLING_ENABLED`, `ANALYTICS_ENABLED`, `TIER_ENFORCEMENT`, +> `COMPLIANCE_ENABLED`) are read at startup. `ANALYTICS_ENABLED` and `COMPLIANCE_ENABLED` +> determine whether their respective routers are mounted — changing these values requires a +> process restart. + +--- + +## File: `docs/devops/database.md` + +### Section: Schema Overview — replace diagram + +Replace: + +``` +agents + └── credentials (FK: client_id → agents.agent_id, CASCADE DELETE) + +audit_events (no FK — append-only, agent_id is informational) + +token_revocations (no FK — independent revocation store) +``` + +With: + +``` +organizations + ├── agents (FK: organization_id → organizations.org_id) + │ ├── credentials (FK: client_id → agents.agent_id, CASCADE DELETE) + │ └── agent_did_keys (FK: agent_id → agents.agent_id) + └── audit_events (FK: organization_id — informational, no cascade) + +token_revocations (no FK — independent revocation store) +oidc_keys (standalone — OIDC signing key rotation) +federation_partners (standalone — cross-tenant identity) +webhook_subscriptions → webhook_deliveries (FK: subscription_id) +agent_marketplace (standalone — agent discovery catalog) +github_oidc_trust_policies (standalone — CI/CD trust) +billing (FK: org_id → organizations.org_id — one row per org) +delegation_chains (standalone — A2A delegation records) +analytics_events (FK: organization_id — append-only) +tenant_tiers (FK: org_id → organizations.org_id — one row per org) +``` + +### Section: Tables — add new table entries + +After the existing `token_revocations` table section, add the following new table definitions: + +--- + +#### `organizations` + +Created by migration `006_create_organizations_table.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `org_id` | `UUID` | No | Primary key | +| `name` | `VARCHAR(255)` | No | Organisation display name | +| `slug` | `VARCHAR(64)` | No | URL-safe unique identifier | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `agent_did_keys` + +Created by migration `012_create_agent_did_keys_table.sql`. + +Stores the DID document key material for each agent. One agent may have multiple keys for +rotation purposes. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `agent_id` | `UUID` | No | FK → `agents.agent_id` | +| `key_id` | `VARCHAR(255)` | No | DID key fragment identifier | +| `public_key_jwk` | `JSONB` | No | Public key in JWK format | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### DID columns on `agents` + +Added by migration `013_add_did_columns_to_agents.sql`: + +- `did` — `VARCHAR(512)` nullable — the `did:web` identifier for this agent +- `did_document` — `JSONB` nullable — full DID document + +--- + +#### `oidc_keys` + +Created by migration `014_create_oidc_keys_table.sql`. + +Stores RSA key pairs used for OIDC ID token signing. Supports key rotation — active key is +determined by the most recently created row. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `kid` | `VARCHAR(128)` | No | Key ID — referenced in JWKS | +| `private_key_pem` | `TEXT` | No | Encrypted RSA private key (pgcrypto) | +| `public_key_pem` | `TEXT` | No | RSA public key | +| `algorithm` | `VARCHAR(16)` | No | Always `RS256` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `federation_partners` + +Created by migration `015_create_federation_partners_table.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | Owning organisation | +| `partner_name` | `VARCHAR(255)` | No | Display name | +| `partner_jwks_url` | `TEXT` | No | URL to partner's JWKS endpoint | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `webhook_subscriptions` + +Created by migration `016_create_webhook_subscriptions_table.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | Owning organisation | +| `event_type` | `VARCHAR(128)` | No | Event type filter (e.g. `agent.created`) | +| `target_url` | `TEXT` | No | HTTPS delivery endpoint | +| `secret` | `VARCHAR(255)` | Yes | HMAC signing secret for delivery verification | +| `active` | `BOOLEAN` | No | Default: `true` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `webhook_deliveries` + +Created by migration `017_create_webhook_deliveries_table.sql`. + +Records each delivery attempt for a webhook event, including the dead-letter queue entries. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `subscription_id` | `UUID` | No | FK → `webhook_subscriptions.id` | +| `event_type` | `VARCHAR(128)` | No | Event type delivered | +| `payload` | `JSONB` | No | Full event payload | +| `status` | `VARCHAR(32)` | No | `pending`, `delivered`, `failed`, `dead_letter` | +| `response_status` | `INTEGER` | Yes | HTTP status from delivery endpoint | +| `attempt_count` | `INTEGER` | No | Default: `0` | +| `last_attempted_at` | `TIMESTAMPTZ` | Yes | | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +**Dead-letter queue:** After 3 failed delivery attempts, the row status is set to `dead_letter` +and the `agentidp_webhook_dead_letters_total` Prometheus counter is incremented. The Prometheus +metric label is `event_type`. + +--- + +#### pgcrypto extension + +Enabled by migration `018_enable_pgcrypto.sql`. Used for encrypting sensitive columns in +`oidc_keys` and credential data. + +--- + +#### `agent_marketplace` + +Created by migration `021_add_agent_marketplace.sql`. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `agent_id` | `UUID` | No | FK → `agents.agent_id` | +| `listing_name` | `VARCHAR(255)` | No | Display name in marketplace | +| `description` | `TEXT` | Yes | Markdown description | +| `tags` | `TEXT[]` | No | Searchable tags. Default: `{}` | +| `published` | `BOOLEAN` | No | Default: `false` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `github_oidc_trust_policies` + +Created by migration `022_add_github_oidc_trust_policies.sql`. + +Maps GitHub Actions OIDC claims to agent identities for CI/CD token exchange. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | Owning organisation | +| `repository` | `VARCHAR(512)` | No | GitHub repository slug (`owner/repo`) | +| `branch` | `VARCHAR(255)` | Yes | Branch filter (null = any branch) | +| `agent_id` | `UUID` | No | Agent to issue a token for on match | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `billing` + +Created by migration `023_add_billing.sql`. + +One row per organisation. Tracks the org's Stripe customer and subscription state. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | FK → `organizations.org_id` (UNIQUE) | +| `stripe_customer_id` | `VARCHAR(255)` | Yes | Stripe Customer ID | +| `stripe_subscription_id` | `VARCHAR(255)` | Yes | Stripe Subscription ID | +| `status` | `VARCHAR(64)` | No | Stripe subscription status or `none` | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `delegation_chains` + +Created by migration `024_add_delegation_chains.sql`. + +Records A2A delegation grants created via the delegation API. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `delegator_agent_id` | `UUID` | No | Agent granting the delegation | +| `delegate_agent_id` | `UUID` | No | Agent receiving the delegation | +| `scopes` | `TEXT[]` | No | Scopes being delegated | +| `expires_at` | `TIMESTAMPTZ` | Yes | Optional expiry | +| `created_at` | `TIMESTAMPTZ` | No | Default: `NOW()` | + +--- + +#### `analytics_events` + +Created by migration `025_add_analytics_events.sql`. + +Append-only event store for analytics. Supports token trend, agent activity, and usage summary +queries. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `organization_id` | `UUID` | No | Owning organisation | +| `date` | `DATE` | No | Calendar date of the event (UTC) | +| `metric_type` | `VARCHAR(64)` | No | e.g. `token_issued`, `agent_called` | +| `count` | `INTEGER` | No | Event count for this date+type | + +**Index:** `(organization_id, date DESC)` for fast time-series queries. + +--- + +#### `tenant_tiers` + +Created by migration `026_add_tenant_tiers.sql`. + +One row per organisation. Stores the current tier and enforces tier limits via the +`tierEnforcement` middleware. + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| `id` | `UUID` | No | Primary key | +| `org_id` | `UUID` | No | FK → `organizations.org_id` (UNIQUE) | +| `tier` | `ENUM('free','pro','enterprise')` | No | Current tier. Default: `free` | +| `updated_at` | `TIMESTAMPTZ` | No | Last tier change. Default: `NOW()` | + +**Tier limits** (from `src/config/tiers.ts`): + +| Tier | Max Agents | Max API Calls/Day | Max Tokens/Day | +|------|-----------|-------------------|----------------| +| free | 10 | 1,000 | 1,000 | +| pro | 100 | 50,000 | 50,000 | +| enterprise | unlimited | unlimited | unlimited | + +--- + +### Section: Migration Runner — update "Running migrations" example output + +Replace the example output block (showing 4 migrations) with: + +``` +Running database migrations... + ✓ Applied: 001_create_agents.sql + ✓ Applied: 002_create_credentials.sql + ... + ✓ Applied: 025_add_analytics_events.sql + ✓ Applied: 026_add_tenant_tiers.sql + +Migrations complete. 26 migration(s) applied. +``` + +Replace the "Verifying applied migrations" expected output to show 26 rows rather than 4. + +--- + +### Section: Connection Pool — replace entirely + +Replace: + +> The application uses `pg.Pool` with default settings (max 10 connections). The pool is a +> singleton — one pool per process instance. +> +> To override pool size, modify `src/db/pool.ts`. In production, ensure `DATABASE_URL` includes +> connection pool parameters if using PgBouncer or a managed connection pooler. + +With: + +> The application uses `pg.Pool` with settings read from environment variables. The pool is a +> singleton — one pool per process instance. +> +> | Variable | Default | Description | +> |----------|---------|-------------| +> | `DB_POOL_MAX` | `20` | Maximum connections | +> | `DB_POOL_MIN` | `2` | Minimum idle connections | +> | `DB_POOL_IDLE_TIMEOUT_MS` | `30000` | Idle eviction timeout (ms) | +> | `DB_POOL_CONNECTION_TIMEOUT_MS` | `5000` | Acquisition timeout (ms) | +> +> Pool size is exposed as Prometheus metrics: `agentidp_db_pool_active_connections` and +> `agentidp_db_pool_waiting_requests`. Monitor these in production to detect pool exhaustion. + +--- + +## File: `docs/devops/architecture.md` + +### Section: Component Overview — replace the ASCII diagram + +Replace the existing ASCII diagram with: + +``` + ┌───────────────────────────────────────────┐ + │ Next.js Portal (port 3001) │ + │ portal/ — Next.js 14 │ + │ /login /agents /credentials /audit │ + │ /analytics /settings/tier /compliance │ + │ /webhooks /marketplace │ + └────────────────┬──────────────────────────┘ + │ HTTP (localhost:3000) + ┌────────────────▼──────────────────────────┐ + │ AgentIdP Application │ + │ Node.js / Express (port 3000) │ + │ │ + │ TLS MW → Helmet → CORS → Morgan │ + │ Metrics MW → OrgContext MW │ + │ UsageMetering MW → TierEnforcement MW │ + │ Auth MW → OPA MW → Routes │ + │ ↓ │ + │ Controllers → Services → Repos │ + └──────────┬───────────────┬────────────────┘ + │ │ + ┌────────────────▼──┐ ┌────────▼────────┐ + │ PostgreSQL 14 │ │ Redis 7 │ + │ Port 5432 │ │ Port 6379 │ + │ │ │ │ + │ 26 migrations │ │ Rate limits │ + │ (001–026) │ │ Token revoke │ + │ organizations │ │ Monthly counts │ + │ agents + DID keys │ │ Tier counters │ + │ credentials │ │ Compliance cache│ + │ audit_events │ │ │ + │ token_revocations │ └──────────────────┘ + │ oidc_keys │ + │ federation_partne-│ ┌──────────────────┐ + │ rs │ │ HashiCorp Vault │ + │ webhook_subscript-│ │ (optional) │ + │ ions + deliveries │ │ KV v2 — creds │ + │ agent_marketplace │ └──────────────────┘ + │ github_oidc_trust │ + │ billing │ ┌──────────────────┐ + │ delegation_chains │ │ Stripe │ + │ analytics_events │ │ (optional) │ + │ tenant_tiers │ │ Billing/upgrades │ + └────────────────────┘ └──────────────────┘ +``` + +### Section: Internal layers table — update + +Replace: + +| Layer | Responsibility | +|-------|---------------| +| Routes | Wire HTTP methods and paths to controllers | +| Auth middleware | Validate Bearer JWT (RS256 + Redis revocation check) | +| Rate limit middleware | Redis sliding-window counter per `client_id` | +| Controllers | Parse and validate request, call service, return response | +| Services | Business logic — no direct DB access | +| Repositories | All SQL queries — no business logic | +| Utils | JWT sign/verify, bcrypt, error types, async handler | + +With: + +| Layer | Responsibility | +|-------|---------------| +| Routes | Wire HTTP methods and paths to controllers | +| TLS middleware | Redirect HTTP → HTTPS when `ENFORCE_TLS=true` | +| Auth middleware | Validate Bearer JWT (RS256 + Redis revocation check) | +| OrgContext middleware | Resolve `organization_id` from JWT and attach to `req` | +| UsageMetering middleware | Fire-and-forget analytics event recording | +| TierEnforcement middleware | Enforce daily API call and token limits via Redis (when `TIER_ENFORCEMENT=true`) | +| OPA middleware | Scope-based authorization via embedded Wasm or JSON policy | +| Controllers | Parse and validate request, call service, return response | +| Services | Business logic — no direct DB access | +| Repositories | All SQL queries — no business logic | +| Utils | JWT sign/verify, bcrypt, error types, async handler | + +### Section: Service Map — replace entirely + +Replace the existing 4-row service map table with the complete Phase 6 service map: + +| Route prefix | Controller | Service(s) | Repository/ies | +|-------------|-----------|-----------|----------------| +| `/api/v1/agents` | `AgentController` | `AgentService` | `AgentRepository` | +| `/api/v1/credentials` | `CredentialController` | `CredentialService` | `CredentialRepository` | +| `/api/v1/token` | `TokenController` | `OAuth2Service` | `TokenRepository`, `CredentialRepository`, `AgentRepository` | +| `/api/v1/audit` | `AuditController` | `AuditService` | `AuditRepository` | +| `/api/v1/organizations` | `OrgController` | `OrgService` | `OrgRepository` | +| `/api/v1/compliance/*` | `ComplianceController` | `ComplianceService` | `AuditRepository` | +| `/api/v1/analytics/*` | `AnalyticsController` | `AnalyticsService` | direct pool queries | +| `/api/v1/tiers/*` | `TierController` | `TierService` | pool queries, Stripe SDK | +| `/api/v1/webhooks` | `WebhookController` | `WebhookService` | `WebhookRepository` | +| `/api/v1/federation` | `FederationController` | `FederationService` | direct pool queries | +| `/api/v1/marketplace` | `MarketplaceController` | `MarketplaceService` | direct pool queries | +| `/api/v1/billing` | `BillingController` | `BillingService` | direct pool queries | +| `/.well-known/did.json`, `/api/v1/did/*` | `DIDController` | `DIDService` | `AgentRepository` | +| `/.well-known/openid-configuration`, `/api/v1/oidc/*` | `OIDCController` | `OIDCKeyService`, `IDTokenService` | direct pool queries | +| `/api/v1/oidc/trust-policies` | `OIDCTrustPolicyController` | `OIDCTrustPolicyService` | direct pool queries | +| `/api/v1/delegation` | `DelegationController` | `DelegationService` | direct pool queries | +| `/api/v1/scaffold` | `ScaffoldController` | `ScaffoldService` | — | +| `/health` | inline | — | pool, redis | +| `/metrics` | inline | — | prom-client | + +### Section: Redis — update key patterns table + +Replace the existing 3-row Redis key patterns table with: + +| Key pattern | Example | Purpose | TTL | +|------------|---------|---------|-----| +| `revoked:` | `revoked:f1e2d3c4-...` | Revoked token JTI | Remaining token lifetime | +| `rate::` | `rate:a1b2c3...:29086156` | Request count per window | `RATE_LIMIT_WINDOW_MS` | +| `monthly:::` | `monthly:a1b2c3...:2026:3` | Monthly token issuance count | End of month | +| `rate:tier:calls:` | `rate:tier:calls:org-uuid` | Daily API call counter for tier enforcement | Until midnight UTC | +| `rate:tier:tokens:` | `rate:tier:tokens:org-uuid` | Daily token issuance counter for tier enforcement | Until midnight UTC | +| `compliance:report:` | `compliance:report:org-uuid` | Cached compliance report JSON | 5 minutes | + +### Section: New Services — add after the existing component descriptions + +Add a new `## New Services (Phases 3–6)` section: + +``` +## New Services (Phases 3–6) + +| Service | Source file | Responsibility | +|---------|------------|----------------| +| `AnalyticsService` | `src/services/AnalyticsService.ts` | Fire-and-forget `recordEvent`, time-series `getTokenTrend`, heatmap `getAgentActivity`, per-agent `getAgentUsageSummary` | +| `TierService` | `src/services/TierService.ts` | `getStatus` (reads `tenant_tiers`), `initiateUpgrade` (creates Stripe Checkout Session), `applyUpgrade` (handles Stripe webhook), `enforceAgentLimit` | +| `ComplianceService` | `src/services/ComplianceService.ts` | `generateReport` (Redis-cached 5 min), `exportAgentCards` (AGNTCY format) | +| `DelegationService` | `src/services/DelegationService.ts` | A2A delegation chain creation and verification | +| `DIDService` | `src/services/DIDService.ts` | `did:web` identifier generation and DID document management | +| `OIDCKeyService` | `src/services/OIDCKeyService.ts` | OIDC key rotation, JWKS endpoint | +| `IDTokenService` | `src/services/IDTokenService.ts` | OIDC ID token issuance | +| `FederationService` | `src/services/FederationService.ts` | Cross-tenant agent identity federation | +| `WebhookService` | `src/services/WebhookService.ts` | Event subscriptions, delivery with retry, dead-letter queue | +| `VaultService` | `src/services/VaultService.ts` | HashiCorp Vault KV v2 read/write for credential storage | +| `BillingService` | `src/services/BillingService.ts` | Stripe customer and subscription management | +| `MarketplaceService` | `src/services/MarketplaceService.ts` | Agent listing and discovery | +| `OIDCTrustPolicyService` | `src/services/OIDCTrustPolicyService.ts` | GitHub OIDC trust policy management | +| `EventPublisher` | `src/services/EventPublisher.ts` | Routes domain events to webhook delivery and Kafka (if configured) | +``` + +### Section: Ports — update table + +Replace: + +| Service | Internal port | Exposed port (local dev) | +|---------|--------------|--------------------------| +| AgentIdP app | 3000 | 3000 | +| PostgreSQL | 5432 | 5432 | +| Redis | 6379 | 6379 | + +With: + +| Service | Internal port | Exposed port (local dev) | +|---------|--------------|--------------------------| +| AgentIdP app | 3000 | 3000 | +| Next.js portal | 3001 | 3001 | +| PostgreSQL | 5432 | 5432 | +| Redis | 6379 | 6379 | + +### Section: Add new section — API Routes + +Add at the end of the file: + +``` +## API Routes (Phase 6 complete) + +Base path: `/api/v1` + +| Route | Method(s) | Auth | Feature flag | +|-------|----------|------|-------------| +| `/api/v1/agents` | GET, POST, PATCH, DELETE | Bearer JWT | always on | +| `/api/v1/credentials` | GET, POST, DELETE | Bearer JWT | always on | +| `/api/v1/token` | POST | none (client credentials) | always on | +| `/api/v1/audit` | GET | Bearer JWT | always on | +| `/api/v1/audit/verify` | GET | Bearer JWT | always on | +| `/api/v1/organizations` | GET, POST | Bearer JWT | always on | +| `/api/v1/compliance/controls` | GET | none | always on | +| `/api/v1/compliance/report` | GET | Bearer JWT | `COMPLIANCE_ENABLED=true` | +| `/api/v1/compliance/agent-cards` | GET | Bearer JWT | `COMPLIANCE_ENABLED=true` | +| `/api/v1/analytics/token-trend` | GET | Bearer JWT | `ANALYTICS_ENABLED=true` | +| `/api/v1/analytics/agent-activity` | GET | Bearer JWT | `ANALYTICS_ENABLED=true` | +| `/api/v1/analytics/usage-summary` | GET | Bearer JWT | `ANALYTICS_ENABLED=true` | +| `/api/v1/tiers/status` | GET | Bearer JWT | always on | +| `/api/v1/tiers/upgrade` | POST | Bearer JWT | always on | +| `/api/v1/webhooks` | GET, POST, DELETE | Bearer JWT | always on | +| `/api/v1/federation` | GET, POST | Bearer JWT | always on | +| `/api/v1/delegation` | GET, POST | Bearer JWT | always on | +| `/api/v1/marketplace` | GET | none | always on | +| `/api/v1/billing` | GET, POST | Bearer JWT | always on | +| `/api/v1/did/*` | GET | none | always on | +| `/api/v1/oidc/*` | GET, POST | mixed | always on | +| `/.well-known/openid-configuration` | GET | none | always on | +| `/.well-known/jwks.json` | GET | none | always on | +| `/.well-known/did.json` | GET | none | always on | +| `/health` | GET | none | always on | +| `/metrics` | GET | none | always on | +``` + +--- + +## File: `docs/devops/local-development.md` + +### Section: Prerequisites table — update + +Replace the existing 3-row prerequisites table with: + +| Tool | Minimum version | Purpose | +|------|----------------|---------| +| Docker | 24+ | Container runtime | +| Docker Compose | 2.20+ | Multi-container orchestration | +| Node.js | 18.0.0 | Run the application, portal, and migrations | +| npm | 9+ | Package management and scripts | +| nvm | any | Recommended for managing Node.js versions | +| openssl | any | RSA key generation | + +Add after the table: + +> **nvm activation:** If using nvm, activate it before running any Node.js commands: +> ```bash +> export NVM_DIR="$HOME/.nvm" && source "$NVM_DIR/nvm.sh" +> ``` + +### Section: Step 1 — clone and install — update + +After `npm install` (which installs the backend), add: + +```bash +# Install portal dependencies +cd portal && npm install && cd .. +``` + +### Section: Step 4 — Start infrastructure services — update the note + +Replace: + +> The `app` service in `docker-compose.yml` requires a `Dockerfile` which has not been written +> yet. This is a **Phase 1 P1 pending item**. The commands below will work once the Dockerfile +> exists. + +With: + +> The full Docker Compose stack (including the `app` container) is available for field trial +> deployments — see the [field trial guide](field-trial.md). For day-to-day development, start +> only the infrastructure services and run the application directly. + +### Section: Step 5 — Run database migrations — update expected output + +Replace the expected output showing 4 migrations with: + +``` +Running database migrations... + ✓ Applied: 001_create_agents.sql + ... + ✓ Applied: 026_add_tenant_tiers.sql + +Migrations complete. 26 migration(s) applied. +``` + +### Section: Add new Step 7 — Start the Next.js portal + +Add after Step 6 (Start the application): + +``` +## Step 7 — Start the Next.js portal (optional) + +The portal is a Next.js 14 application in the `portal/` directory. It communicates with the +AgentIdP backend at `http://localhost:3000`. + +Start the portal development server: + +```bash +cd portal && npm run dev +``` + +The portal starts on port 3001 by default. Open http://localhost:3001. + +Available routes: + +| Route | Description | +|-------|-------------| +| `/login` | OAuth 2.0 login page | +| `/agents` | Agent registry | +| `/credentials` | Credential management | +| `/audit` | Audit log viewer | +| `/analytics` | Token trend and agent activity charts | +| `/settings/tier` | Tier status and upgrade | +| `/compliance` | AGNTCY compliance report | +| `/webhooks` | Webhook subscription management | +| `/marketplace` | Agent marketplace | + +Build the portal for production: + +```bash +cd portal && npm run build +cd portal && npm start # serves the production build +``` + +Ensure `CORS_ORIGIN` in your `.env` includes `http://localhost:3001`: +``` +CORS_ORIGIN=http://localhost:3001 +``` +``` + +--- + +## File: `docs/devops/operations.md` + +### Section: Startup checklist — update + +Replace the existing 4-step checklist with a checklist that reflects Docker Compose full-stack +operation and includes the portal: + +```bash +# 1. Start the full stack +docker compose up --build -d + +# 2. Verify all three services are healthy +docker compose ps +# app, postgres, and redis must all show "healthy" + +# 3. Run migrations +docker compose exec app npm run db:migrate + +# 4. Verify application health +curl http://localhost:3000/health +# Expected: {"status":"ok"} + +# 5. (Optional) Start the portal for local dev +cd portal && npm run dev +``` + +### Section: Redis Key Patterns — update table + +Replace the 3-row table with the complete 6-row table (same as the architecture.md update above): + +| Key pattern | Example | Purpose | TTL | +|------------|---------|---------|-----| +| `revoked:` | `revoked:f1e2d3c4-...` | Revoked token JTI | Remaining token lifetime | +| `rate::` | `rate:a1b2c3...:29086156` | Request count per window | `RATE_LIMIT_WINDOW_MS` | +| `monthly:::` | `monthly:a1b2c3...:2026:3` | Monthly token issuance count | End of month | +| `rate:tier:calls:` | `rate:tier:calls:org-uuid` | Daily API call counter for tier enforcement | Until midnight UTC | +| `rate:tier:tokens:` | `rate:tier:tokens:org-uuid` | Daily token issuance counter for tier enforcement | Until midnight UTC | +| `compliance:report:` | `compliance:report:org-uuid` | Cached compliance report JSON | 5 minutes | + +Add the following new inspection commands at the end of the "Inspect keys" section: + +```bash +# Check tier API call counter for a tenant +redis-cli GET "rate:tier:calls:" + +# Check tier token counter for a tenant +redis-cli GET "rate:tier:tokens:" + +# Check cached compliance report for a tenant +redis-cli GET "compliance:report:" +redis-cli TTL "compliance:report:" +``` + +### Section: Monitoring — update Metrics Exposed table + +Replace the existing 6-row metrics table with the complete 19-metric table: + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `agentidp_tokens_issued_total` | Counter | `scope` | OAuth 2.0 tokens issued | +| `agentidp_agents_registered_total` | Counter | `deployment_env` | Agents registered | +| `agentidp_http_requests_total` | Counter | `method`, `route`, `status_code` | HTTP requests | +| `agentidp_http_request_duration_seconds` | Histogram | `method`, `route`, `status_code` | HTTP latency | +| `agentidp_db_query_duration_seconds` | Histogram | `operation` | PostgreSQL query duration | +| `agentidp_redis_command_duration_seconds` | Histogram | `command` | Redis command duration | +| `agentidp_webhook_dead_letters_total` | Counter | `event_type` | Webhook deliveries moved to dead-letter queue | +| `agentidp_credentials_expiring_soon_total` | Gauge | — | Credentials expiring within 7 days | +| `agentidp_audit_chain_integrity` | Gauge | — | `1` if audit chain is intact, `0` if broken | +| `agentidp_rate_limit_hits_total` | Counter | `client_id` | Rate limit rejections | +| `agentidp_db_pool_active_connections` | Gauge | — | Active PostgreSQL connections | +| `agentidp_db_pool_waiting_requests` | Gauge | — | Requests waiting for a pool connection | +| `agentidp_tenant_api_calls_total` | Counter | `org_id`, `tier` | API calls per tenant per tier | +| `agentidp_billing_limit_rejections_total` | Counter | `org_id`, `limit_type` | Tier limit enforcement rejections | +| `agentidp_did_documents_generated_total` | Counter | — | DID documents generated | +| `agentidp_oidc_tokens_issued_total` | Counter | — | OIDC ID tokens issued | +| `agentidp_federation_events_total` | Counter | `event_type` | Federation partner events | +| `agentidp_delegation_chains_created_total` | Counter | — | A2A delegation chains created | +| `agentidp_compliance_reports_generated_total` | Counter | — | Compliance reports generated | + +### Section: Troubleshooting — add new entries + +Append the following troubleshooting entries: + +--- + +**Tier limit rejected — 429 with `tier_limit_exceeded` code** + +Symptom: `429 TOO_MANY_REQUESTS` with body `{"code":"tier_limit_exceeded","message":"..."}` + +Check the tenant's current tier counter: +```bash +# Check API call counter +docker compose exec redis redis-cli GET "rate:tier:calls:" + +# Check the tenant's tier +psql "$DATABASE_URL" -c "SELECT org_id, tier FROM tenant_tiers WHERE org_id = '';" +``` + +If the org is on the `free` tier and has hit 1,000 calls/day, upgrade the tier or wait until +midnight UTC for the counter to reset. + +--- + +**Analytics endpoints return 404** + +Cause: `ANALYTICS_ENABLED` is set to `false` in `.env`. + +Fix: Set `ANALYTICS_ENABLED=true` and restart the application. + +--- + +**Compliance report returns 404** + +Cause: `COMPLIANCE_ENABLED` is set to `false` in `.env`. + +Fix: Set `COMPLIANCE_ENABLED=true` and restart the application. + +--- + +**Portal CORS error** + +Symptom: Browser console shows `Access-Control-Allow-Origin` error on requests to +`http://localhost:3000`. + +Fix: Ensure `CORS_ORIGIN` in `.env` includes `http://localhost:3001`: +``` +CORS_ORIGIN=http://localhost:3001 +``` +Restart the application after changing this variable. + +--- + +## File: `docs/devops/deployment.md` + +### Section: Environment Variable Reference (Section 6) — update Quick Reference table + +Add the following rows to the existing quick reference table: + +| Variable | Required | Source (AWS) | Source (GCP) | +|----------|----------|--------------|--------------| +| `BILLING_ENABLED` | No | Task definition env var | Cloud Run env var | +| `STRIPE_SECRET_KEY` | Only if billing enabled | Secrets Manager: `///stripe-secret-key` | Secret Manager: `-stripe-secret-key` | +| `STRIPE_WEBHOOK_SECRET` | Only if billing enabled | Secrets Manager: `///stripe-webhook-secret` | Secret Manager: `-stripe-webhook-secret` | +| `STRIPE_PRICE_ID` | Only if billing enabled | Task definition env var | Cloud Run env var | +| `ANALYTICS_ENABLED` | No | Task definition env var (default: `true`) | Cloud Run env var | +| `TIER_ENFORCEMENT` | No | Task definition env var (default: `true`) | Cloud Run env var | +| `COMPLIANCE_ENABLED` | No | Task definition env var (default: `true`) | Cloud Run env var | +| `REDIS_RATE_LIMIT_ENABLED` | No | Task definition env var | Cloud Run env var | +| `RATE_LIMIT_WINDOW_MS` | No | Task definition env var (default: `60000`) | Cloud Run env var | +| `RATE_LIMIT_MAX_REQUESTS` | No | Task definition env var (default: `100`) | Cloud Run env var | +| `DB_POOL_MAX` | No | Task definition env var (default: `20`) | Cloud Run env var | +| `DB_POOL_MIN` | No | Task definition env var (default: `2`) | Cloud Run env var | +| `DB_POOL_IDLE_TIMEOUT_MS` | No | Task definition env var (default: `30000`) | Cloud Run env var | +| `DB_POOL_CONNECTION_TIMEOUT_MS` | No | Task definition env var (default: `5000`) | Cloud Run env var | +| `KAFKA_BROKERS` | No | Task definition env var | Cloud Run env var | +| `ENFORCE_TLS` | No | Task definition env var | Cloud Run env var | +| `OPA_URL` | No | Task definition env var | Cloud Run env var | +| `VAULT_KV_MOUNT` | No | Task definition env var (default: `secret`) | Cloud Run env var | + +### Section: Step 2.8 / Step 3.7 — Run Database Migrations — update migration count + +In the migration command output examples in sections 2.8 and 3.7, update migration count +references from "4 migration(s)" to "26 migration(s)". + +--- + +## File: `docs/devops/security.md` + +No structural changes required. Append the following note at the end of the "JWT Key Management" +section: + +> **OIDC keys** are separate from the main JWT keys. OIDC signing keys are stored in the +> `oidc_keys` PostgreSQL table (created by migration `014_create_oidc_keys_table.sql`), encrypted +> at rest using pgcrypto (enabled by migration `018_enable_pgcrypto.sql`). The `OIDCKeyService` +> manages rotation. OIDC keys do not need to be set as environment variables — they are +> provisioned automatically on first startup. + +--- + +## File: `docs/devops/vault-setup.md` + +### Section: Add note on `VAULT_KV_MOUNT` alias + +After the `VAULT_MOUNT` variable description, add: + +> **Note:** The `.env.example` file uses `VAULT_KV_MOUNT` as the variable name. The application +> reads both `VAULT_KV_MOUNT` and `VAULT_MOUNT` — prefer `VAULT_KV_MOUNT` in new configurations +> for consistency with the current `.env.example`. + +--- + +## File: `docs/devops/README.md` + +### Section: Document index — add field-trial.md entry + +Add `field-trial.md` to the document index table: + +| Document | Audience | Contents | +|----------|----------|---------| +| ... existing entries ... | +| [field-trial.md](field-trial.md) | DevOps engineers, QA | In-house Docker Compose field trial execution playbook | + +--- + +## Acceptance Criteria + +- [ ] `environment-variables.md` documents all 11 new variables from Phases 3–6 +- [ ] `environment-variables.md` complete `.env` example includes all Phase 6 flags +- [ ] `database.md` schema overview reflects all 26 migrations (001–026) +- [ ] `database.md` documents all 10 new tables added in Phases 3–6 +- [ ] `database.md` connection pool section references `DB_POOL_*` env vars +- [ ] `architecture.md` diagram shows Next.js portal at port 3001 +- [ ] `architecture.md` service map covers all 19 route prefixes +- [ ] `architecture.md` Redis table covers all 6 key patterns +- [ ] `architecture.md` new services section documents all 13 Phase 3–6 services +- [ ] `architecture.md` API routes section covers all 25 routes +- [ ] `local-development.md` includes portal setup (Step 7) with all 9 portal routes +- [ ] `operations.md` startup checklist uses `docker compose` (not `docker-compose`) +- [ ] `operations.md` Redis table covers all 6 key patterns with correct TTLs +- [ ] `operations.md` metrics table covers all 19 Prometheus metrics +- [ ] `operations.md` troubleshooting covers tier limits, feature flag 404s, portal CORS +- [ ] `deployment.md` variable quick reference includes all Phase 3–6 variables +- [ ] `security.md` note on OIDC keys added +- [ ] `vault-setup.md` note on `VAULT_KV_MOUNT` alias added +- [ ] `README.md` index includes `field-trial.md` diff --git a/openspec/changes/archive/phase-7-devops-field-trial/specs/field-trial-guide/spec.md b/openspec/changes/archive/phase-7-devops-field-trial/specs/field-trial-guide/spec.md new file mode 100644 index 0000000..d784d3b --- /dev/null +++ b/openspec/changes/archive/phase-7-devops-field-trial/specs/field-trial-guide/spec.md @@ -0,0 +1,1026 @@ +# Spec — WS2: In-House Field Trial Guide + +**Change:** phase-7-devops-field-trial +**Workstream:** WS2 +**Status:** Approved +**Written:** 2026-04-04 + +## Purpose + +Specify the structure and required content for `docs/devops/field-trial.md`. This document is a +complete, step-by-step execution playbook for in-house Docker Compose field trials. A DevOps +engineer must be able to follow it from a clean machine to a fully verified system without +asking any questions. + +The Developer implementing this spec must write `docs/devops/field-trial.md` exactly as +specified. Every command must be exact and runnable. Every expected output must match what the +application actually produces. + +--- + +## Output file + +**Path:** `docs/devops/field-trial.md` + +--- + +## Document structure + +The document must have the following top-level structure: + +``` +# SentryAgent.ai AgentIdP — In-House Field Trial Guide + +[Introduction paragraph] + +## Prerequisites + +## Section 0 — Environment Setup + +## Phase A — Stack Startup + +## Phase B — Core Product Journeys + +## Phase C — Guardrails + +## Phase D — Portal + +## Phase E — AGNTCY Conformance + +## Phase F — Performance Baseline + +## Troubleshooting +``` + +--- + +## Introduction paragraph (required) + +``` +This guide is the execution playbook for in-house Docker Compose field trials of SentryAgent.ai +AgentIdP. Follow each phase in order. All commands are exact — copy and paste them directly. + +Estimated time to complete all phases: 45–60 minutes. + +Prerequisites must be satisfied before Section 0. +``` + +--- + +## Prerequisites + +### Required content + +The Prerequisites section must document each of the following with exact check commands: + +**Docker 24+ and Docker Compose 2.20+** + +```bash +docker --version +# Expected: Docker version 24.x.x or higher + +docker compose version +# Expected: Docker Compose version v2.20.x or higher +``` + +**Node.js 18+ via nvm** + +```bash +export NVM_DIR="$HOME/.nvm" && source "$NVM_DIR/nvm.sh" +node --version +# Expected: v18.x.x or higher +``` + +**openssl** + +```bash +openssl version +# Expected: OpenSSL 1.1.x or higher (any version) +``` + +**Git repo cloned** + +```bash +git clone https://git.sentryagent.ai/vijay_admin/sentryagent-idp.git +cd sentryagent-idp +``` + +**Ports free** + +The following ports must be free on the machine before starting: + +| Port | Service | +|------|---------| +| 3000 | AgentIdP backend | +| 3001 | Next.js portal | +| 5432 | PostgreSQL | +| 6379 | Redis | + +Check all ports: + +```bash +lsof -i :3000 -i :3001 -i :5432 -i :6379 +# Expected: no output (all ports free) +``` + +If any port is in use, kill the occupying process: + +```bash +lsof -ti: | xargs kill +``` + +--- + +## Section 0 — Environment Setup + +### Required content + +This section guides the engineer through creating a valid `.env` file for field trial use. + +**Step 0.1 — Copy `.env.example`** + +```bash +cp .env.example .env +``` + +**Step 0.2 — Generate RSA-2048 keypair** + +Generate the JWT signing keys: + +```bash +openssl genrsa -out private.pem 2048 +openssl rsa -in private.pem -pubout -out public.pem +``` + +Verify the keys are valid: + +```bash +openssl rsa -in private.pem -check -noout +# Expected: RSA key ok + +openssl rsa -in public.pem -pubin -noout -text 2>&1 | head -3 +# Expected: Public-Key: (2048 bit) +``` + +**Step 0.3 — Write keys into `.env`** + +Write the private key as a single-line PEM with `\n` separators: + +```bash +PRIVATE_KEY_LINE=$(awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}' private.pem) +sed -i "s|JWT_PRIVATE_KEY=.*|JWT_PRIVATE_KEY=\"${PRIVATE_KEY_LINE}\"|" .env +``` + +Write the public key: + +```bash +PUBLIC_KEY_LINE=$(awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}' public.pem) +sed -i "s|JWT_PUBLIC_KEY=.*|JWT_PUBLIC_KEY=\"${PUBLIC_KEY_LINE}\"|" .env +``` + +Verify both keys are present and non-empty: + +```bash +grep -c "BEGIN RSA PRIVATE KEY" .env +# Expected: 1 + +grep -c "BEGIN PUBLIC KEY" .env +# Expected: 1 +``` + +**Step 0.4 — Configure field trial values** + +Set the following values in `.env`. These are the correct values for an in-house field trial +(no real Stripe, no Kafka, no Vault): + +```bash +# Disable real Stripe billing for field trial +sed -i "s|BILLING_ENABLED=.*|BILLING_ENABLED=false|" .env +sed -i "s|STRIPE_SECRET_KEY=.*|STRIPE_SECRET_KEY=sk_test_placeholder|" .env +sed -i "s|STRIPE_WEBHOOK_SECRET=.*|STRIPE_WEBHOOK_SECRET=whsec_placeholder|" .env +sed -i "s|STRIPE_PRICE_ID=.*|STRIPE_PRICE_ID=price_placeholder|" .env + +# Keep feature flags at defaults +sed -i "s|ANALYTICS_ENABLED=.*|ANALYTICS_ENABLED=true|" .env +sed -i "s|TIER_ENFORCEMENT=.*|TIER_ENFORCEMENT=true|" .env +sed -i "s|COMPLIANCE_ENABLED=.*|COMPLIANCE_ENABLED=true|" .env + +# Allow portal CORS +sed -i "s|CORS_ORIGIN=.*|CORS_ORIGIN=http://localhost:3001|" .env +``` + +**Step 0.5 — Verify final `.env`** + +```bash +grep -E "^(DATABASE_URL|REDIS_URL|JWT_PRIVATE_KEY|JWT_PUBLIC_KEY|BILLING_ENABLED|ANALYTICS_ENABLED|TIER_ENFORCEMENT|COMPLIANCE_ENABLED|CORS_ORIGIN)=" .env +``` + +Expected output (values abbreviated): + +``` +DATABASE_URL=postgresql://agentidp:password@localhost:5432/agentidp +REDIS_URL=redis://localhost:6379 +JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\n... +JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----\n... +BILLING_ENABLED=false +ANALYTICS_ENABLED=true +TIER_ENFORCEMENT=true +COMPLIANCE_ENABLED=true +CORS_ORIGIN=http://localhost:3001 +``` + +--- + +## Phase A — Stack Startup + +### Required content + +**Step A.1 — Build and start the full stack** + +```bash +docker compose up --build -d +``` + +This builds the `app` container image and starts all three services. The `app` service waits +for `postgres` and `redis` to pass their health checks before starting. + +**Step A.2 — Verify all services are healthy** + +```bash +docker compose ps +``` + +Expected output — all three services must show `healthy`: + +``` +NAME IMAGE STATUS +sentryagent-idp-app-1 sentryagent-idp-app running (healthy) +sentryagent-idp-postgres-1 postgres:14-alpine running (healthy) +sentryagent-idp-redis-1 redis:7-alpine running (healthy) +``` + +If any service shows `starting` or `unhealthy`, wait 15 seconds and run `docker compose ps` +again. If a service remains unhealthy after 60 seconds, see Troubleshooting. + +**Step A.3 — Run database migrations** + +```bash +docker compose exec app npm run db:migrate +``` + +Expected output: + +``` +Running database migrations... + ✓ Applied: 001_create_agents.sql + ✓ Applied: 002_create_credentials.sql + ... + ✓ Applied: 025_add_analytics_events.sql + ✓ Applied: 026_add_tenant_tiers.sql + +Migrations complete. 26 migration(s) applied. +``` + +All 26 migrations must apply without error before proceeding. + +**Step A.4 — Verify application health** + +```bash +curl -s http://localhost:3000/health | jq . +``` + +Expected response: + +```json +{"status":"ok"} +``` + +**Step A.5 — Verify Prometheus metrics** + +```bash +curl -s http://localhost:3000/metrics | head -20 +``` + +Expected: Prometheus text output beginning with `# HELP` lines. Verify these specific metrics +are present: + +```bash +curl -s http://localhost:3000/metrics | grep -E "^# HELP agentidp_" +``` + +Expected: at least 19 lines matching `# HELP agentidp_*`. + +--- + +## Phase B — Core Product Journeys + +### Required content + +This phase tests the end-to-end agent identity lifecycle. Run each step in order. Each step +depends on the output of the previous step. + +> **Note on tokens:** The steps below use shell variables to pass values between commands. Run +> all commands in the same terminal session. + +**Step B.1 — Create an organisation** + +```bash +ORG_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Content-Type: application/json" \ + -d '{"name":"Field Trial Org","slug":"field-trial"}') + +echo $ORG_RESPONSE | jq . +ORG_ID=$(echo $ORG_RESPONSE | jq -r '.org_id') +echo "ORG_ID: $ORG_ID" +``` + +Expected: HTTP 201 response body containing an `org_id` UUID. `ORG_ID` must be a non-empty UUID. + +**Step B.2 — Register an agent** + +```bash +AGENT_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Content-Type: application/json" \ + -d "{ + \"email\": \"trial-agent@field-trial.sentryagent.ai\", + \"agent_type\": \"classifier\", + \"version\": \"1.0.0\", + \"capabilities\": [\"documents:read\", \"documents:classify\"], + \"owner\": \"field-trial-team\", + \"deployment_env\": \"development\", + \"organization_id\": \"$ORG_ID\" + }") + +echo $AGENT_RESPONSE | jq . +AGENT_ID=$(echo $AGENT_RESPONSE | jq -r '.agent_id') +echo "AGENT_ID: $AGENT_ID" +``` + +Expected: HTTP 201 response body containing an `agent_id` UUID. + +**Step B.3 — Generate credentials** + +```bash +CRED_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/credentials \ + -H "Content-Type: application/json" \ + -d "{\"agent_id\": \"$AGENT_ID\"}") + +echo $CRED_RESPONSE | jq . +CLIENT_ID=$(echo $CRED_RESPONSE | jq -r '.client_id') +CLIENT_SECRET=$(echo $CRED_RESPONSE | jq -r '.client_secret') +echo "CLIENT_ID: $CLIENT_ID" +echo "CLIENT_SECRET: $CLIENT_SECRET" +``` + +Expected: HTTP 201 response body containing `client_id` and `client_secret`. The `client_secret` +is only returned once — save it now. + +**Step B.4 — Issue an OAuth 2.0 access token** + +```bash +TOKEN_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/token \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=$CLIENT_ID&client_secret=$CLIENT_SECRET&scope=read") + +echo $TOKEN_RESPONSE | jq . +ACCESS_TOKEN=$(echo $TOKEN_RESPONSE | jq -r '.access_token') +echo "ACCESS_TOKEN obtained: ${ACCESS_TOKEN:0:30}..." +``` + +Expected: HTTP 200 response body with `access_token`, `token_type: "Bearer"`, `expires_in: 3600`, +`scope: "read"`. + +**Step B.5 — Use the token on a protected endpoint** + +```bash +curl -s -H "Authorization: Bearer $ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents | jq . +``` + +Expected: HTTP 200 with a JSON array of agents including the agent registered in Step B.2. + +**Step B.6 — Inspect JWT claims** + +Decode and inspect the access token structure (without verifying signature): + +```bash +echo $ACCESS_TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq . +``` + +Expected claims: + +```json +{ + "sub": "", + "iss": "https://sentryagent.ai", + "aud": "sentryagent-api", + "scope": "read", + "agent_id": "", + "organization_id": "", + "iat": , + "exp": , + "jti": "" +} +``` + +Verify `exp - iat = 3600` (1 hour TTL). + +**Step B.7 — Rotate credentials and verify old token is rejected** + +Rotate the credentials (generates a new client_secret, revokes the old one): + +```bash +ROTATE_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/credentials \ + -H "Content-Type: application/json" \ + -d "{\"agent_id\": \"$AGENT_ID\"}") + +NEW_CLIENT_ID=$(echo $ROTATE_RESPONSE | jq -r '.client_id') +NEW_CLIENT_SECRET=$(echo $ROTATE_RESPONSE | jq -r '.client_secret') +echo "New credential: $NEW_CLIENT_ID" +``` + +Attempt to use the old token (must be rejected): + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents +# Expected: 401 +``` + +Issue a new token with the new credentials: + +```bash +NEW_TOKEN_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/token \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=$NEW_CLIENT_ID&client_secret=$NEW_CLIENT_SECRET&scope=read") + +NEW_ACCESS_TOKEN=$(echo $NEW_TOKEN_RESPONSE | jq -r '.access_token') +echo "New token obtained." +``` + +Verify the new token works: + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents +# Expected: 200 +``` + +**Step B.8 — Check audit log** + +```bash +curl -s -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + "http://localhost:3000/api/v1/audit?limit=10" | jq . +``` + +Expected: JSON array of audit events. Verify these action types are present from Steps B.1–B.7: +`agent.created`, `credential.generated`, `token.issued`, `credential.rotated`, `token.revoked`. + +--- + +## Phase C — Guardrails + +### Required content + +This phase tests security boundaries. Each test case must be run with the exact command shown +and must produce the specified HTTP status code. + +> **Setup:** Ensure `$NEW_ACCESS_TOKEN` is still set from Phase B. Use `export NEW_ACCESS_TOKEN` +> if switching terminals. + +**Test C.1 — No Authorization header → 401** + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + http://localhost:3000/api/v1/agents +``` + +Expected HTTP status: `401` + +**Test C.2 — Malformed JWT → 401** + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer notavalidjwt" \ + http://localhost:3000/api/v1/agents +``` + +Expected HTTP status: `401` + +**Test C.3 — Expired JWT → 401** + +Use a known-expired token. Generate one with a 1-second TTL (requires a test helper or +manually craft an expired JWT). For field trial purposes, use this pre-constructed expired token +(signed with a different key — will fail signature verification and return 401): + +```bash +EXPIRED_TOKEN="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0IiwiZXhwIjoxfQ.invalid" + +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $EXPIRED_TOKEN" \ + http://localhost:3000/api/v1/agents +``` + +Expected HTTP status: `401` + +**Test C.4 — Valid JWT, wrong scope → 403** + +Issue a token with scope `read`, then attempt to access an endpoint requiring scope `write`: + +```bash +# The NEW_ACCESS_TOKEN has scope "read" +# Attempt an action requiring "write" scope (create agent) +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + -H "Content-Type: application/json" \ + -X POST http://localhost:3000/api/v1/agents \ + -d '{"email":"scope-test@example.com","agent_type":"custom","version":"1.0.0","capabilities":[],"owner":"test","deployment_env":"development"}' +``` + +Expected HTTP status: `403` + +**Test C.5 — Rate limit: 101 requests → 429 on the 101st** + +Send 101 requests in rapid succession. The 101st must return 429. + +```bash +for i in $(seq 1 101); do + STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents) + if [ "$STATUS" = "429" ]; then + echo "Request $i returned 429 (PASS)" + break + fi +done +``` + +Expected: Output shows `Request 101 returned 429 (PASS)` (or earlier if previous requests in +the session have already counted toward the window). + +After this test, wait 60 seconds for the rate limit window to reset, or use a fresh +`client_id` for subsequent tests. + +**Test C.6 — Tier limit: exceed free-tier API call limit → 429 with `tier_limit_exceeded`** + +The free tier allows 1,000 API calls per day. For field trial, manually set the counter to the +limit value to trigger the guard without making 1,000 real requests: + +```bash +# Get the org_id from the token +ORG_ID=$(echo $NEW_ACCESS_TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.organization_id') + +# Force the counter to the limit via Redis CLI +docker compose exec redis redis-cli SET "rate:tier:calls:$ORG_ID" 1001 EX 86400 + +# The next API call must be rejected +TIER_RESPONSE=$(curl -s -w "\n%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents) + +echo "$TIER_RESPONSE" +``` + +Expected: HTTP status `429`. Response body must contain `"code":"tier_limit_exceeded"`. + +Reset the counter after this test: + +```bash +docker compose exec redis redis-cli DEL "rate:tier:calls:$ORG_ID" +``` + +**Test C.7 — Tenant isolation: Org A token cannot access Org B agents → 403** + +Create a second organisation and agent: + +```bash +ORG_B_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/organizations \ + -H "Content-Type: application/json" \ + -d '{"name":"Org B","slug":"org-b"}') + +ORG_B_ID=$(echo $ORG_B_RESPONSE | jq -r '.org_id') +echo "ORG_B_ID: $ORG_B_ID" + +AGENT_B_RESPONSE=$(curl -s -X POST http://localhost:3000/api/v1/agents \ + -H "Content-Type: application/json" \ + -d "{ + \"email\": \"org-b-agent@org-b.sentryagent.ai\", + \"agent_type\": \"monitor\", + \"version\": \"1.0.0\", + \"capabilities\": [], + \"owner\": \"org-b\", + \"deployment_env\": \"development\", + \"organization_id\": \"$ORG_B_ID\" + }") + +AGENT_B_ID=$(echo $AGENT_B_RESPONSE | jq -r '.agent_id') +echo "AGENT_B_ID: $AGENT_B_ID" +``` + +Attempt to access Org B's agent using Org A's token: + +```bash +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents/$AGENT_B_ID +``` + +Expected HTTP status: `403` + +--- + +## Phase D — Portal + +### Required content + +**Step D.1 — Install portal dependencies** + +```bash +cd portal && npm install && cd .. +``` + +**Step D.2 — Start the portal development server** + +```bash +cd portal && npm run dev & +``` + +Wait 5 seconds for Next.js to compile, then verify it is listening: + +```bash +curl -s -o /dev/null -w "%{http_code}" http://localhost:3001 +# Expected: 200 or 307 (redirect to /login) +``` + +**Step D.3 — Verify each portal route loads** + +Open a browser and navigate to each of the following URLs. Each must load without a JavaScript +error in the browser console: + +| URL | Expected | +|-----|---------| +| `http://localhost:3001/login` | Login page renders | +| `http://localhost:3001/agents` | Agent list renders (may be empty or show auth redirect) | +| `http://localhost:3001/credentials` | Credentials page renders | +| `http://localhost:3001/audit` | Audit log page renders | +| `http://localhost:3001/analytics` | Analytics dashboard renders | +| `http://localhost:3001/settings/tier` | Tier status page renders | +| `http://localhost:3001/compliance` | Compliance report page renders | +| `http://localhost:3001/webhooks` | Webhooks page renders | +| `http://localhost:3001/marketplace` | Marketplace page renders | + +All 9 routes must load without a blank page or unhandled error. + +**Step D.4 — Verify analytics charts render** + +Navigate to `http://localhost:3001/analytics`. + +Verify both of the following chart components are present in the page DOM: + +```bash +curl -s http://localhost:3001/analytics | grep -c "recharts" +# Expected: 1 or more (recharts is used for TokenTrendChart and AgentHeatmap) +``` + +**Step D.5 — Verify tier status page** + +Navigate to `http://localhost:3001/settings/tier`. + +The page must display the current tier (expected: `free` for a new organisation). + +**Step D.6 — Stop the portal** + +```bash +kill $(lsof -ti:3001) +``` + +--- + +## Phase E — AGNTCY Conformance + +### Required content + +**Step E.1 — Activate nvm** + +```bash +export NVM_DIR="$HOME/.nvm" && source "$NVM_DIR/nvm.sh" +``` + +**Step E.2 — Run the AGNTCY conformance suite** + +```bash +npm run test:agntcy-conformance +``` + +**Step E.3 — Expected output** + +``` +AGNTCY Conformance Suite + Agent Card Export + ✓ exports valid AGNTCY agent card format + ✓ agent card contains required identity fields + Compliance Report + ✓ generates SOC2-aligned compliance report + ✓ compliance report includes all required control domains + +4 passing (Xs) +``` + +All 4 tests must pass. A failure indicates a regression in AGNTCY conformance. + +**What each test validates:** + +| Test | What it validates | +|------|------------------| +| `exports valid AGNTCY agent card format` | The `/api/v1/compliance/agent-cards` endpoint returns an array where each card has `id`, `name`, `version`, `capabilities`, `did` fields in AGNTCY format | +| `agent card contains required identity fields` | Each agent card's `identity` block includes `agent_id`, `organization_id`, `did`, and `deployment_env` | +| `generates SOC2-aligned compliance report` | The `/api/v1/compliance/report` endpoint returns a report with `generated_at`, `controls`, `summary` top-level keys | +| `compliance report includes all required control domains` | The `controls` array in the report includes entries for `access_control`, `audit_logging`, `credential_management`, and `tenant_isolation` | + +--- + +## Phase F — Performance Baseline + +### Required content + +> **Prerequisite:** Apache Bench (`ab`) must be installed. On Ubuntu: `sudo apt install apache2-utils`. +> Verify: `ab -V` + +**Step F.1 — Create a token payload file** + +```bash +cat > /tmp/token_payload.json << 'EOF' +grant_type=client_credentials&client_id=REPLACE_CLIENT_ID&client_secret=REPLACE_CLIENT_SECRET&scope=read +EOF +``` + +Replace `REPLACE_CLIENT_ID` and `REPLACE_CLIENT_SECRET` with `$NEW_CLIENT_ID` and +`$NEW_CLIENT_SECRET` from Phase B: + +```bash +cat > /tmp/token_payload.txt << EOF +grant_type=client_credentials&client_id=${NEW_CLIENT_ID}&client_secret=${NEW_CLIENT_SECRET}&scope=read +EOF +``` + +**Step F.2 — Benchmark token endpoint** + +```bash +ab -n 100 -c 10 \ + -p /tmp/token_payload.txt \ + -T "application/x-www-form-urlencoded" \ + http://localhost:3000/api/v1/token +``` + +**Pass criteria for token endpoint:** + +- `Requests per second` > 10 +- `Time per request (mean)` < 100 ms +- p95 (95th percentile, shown as `95%` in the `Percentage of requests` table) < 100 ms +- Zero non-2xx responses + +**Step F.3 — Benchmark agent list endpoint** + +Ensure `$NEW_ACCESS_TOKEN` is still set and valid. Issue a fresh token if needed: + +```bash +NEW_ACCESS_TOKEN=$(curl -s -X POST http://localhost:3000/api/v1/token \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=${NEW_CLIENT_ID}&client_secret=${NEW_CLIENT_SECRET}&scope=read" \ + | jq -r '.access_token') +``` + +Run the benchmark: + +```bash +ab -n 100 -c 10 \ + -H "Authorization: Bearer $NEW_ACCESS_TOKEN" \ + http://localhost:3000/api/v1/agents +``` + +**Pass criteria for agent list endpoint:** + +- `Time per request (mean)` < 200 ms +- p95 (`95%` row in the `Percentage of requests` table) < 200 ms +- Zero non-2xx responses + +**Step F.4 — Record results** + +Record the following values from each `ab` output for the field trial report: + +| Endpoint | Metric | Value | +|----------|--------|-------| +| `/api/v1/token` | Requests per second | | +| `/api/v1/token` | Mean time per request (ms) | | +| `/api/v1/token` | p95 (ms) | | +| `/api/v1/agents` | Requests per second | | +| `/api/v1/agents` | Mean time per request (ms) | | +| `/api/v1/agents` | p95 (ms) | | + +A field trial passes Phase F if all p95 values are within the pass criteria above. + +--- + +## Troubleshooting + +### Required content + +Each entry must follow the pattern: **Symptom** → **Cause** → **Fix** with exact commands. + +--- + +**Port already in use** + +Symptom: + +``` +Error response from daemon: driver failed programming external connectivity on endpoint +sentryagent-idp-app-1: Bind for 0.0.0.0:3000 failed: port is already allocated +``` + +Fix: Kill the process occupying the port, then restart: + +```bash +lsof -ti:3000 | xargs kill +lsof -ti:5432 | xargs kill +lsof -ti:6379 | xargs kill +docker compose up --build -d +``` + +--- + +**Container shows `unhealthy`** + +Symptom: `docker compose ps` shows `unhealthy` for a service. + +Fix: Check logs for the unhealthy service: + +```bash +docker compose logs postgres +docker compose logs redis +docker compose logs app +``` + +Common causes: + +| Service | Cause | Fix | +|---------|-------|-----| +| `postgres` | Wrong database credentials | Verify `DATABASE_URL` in `.env` matches `docker-compose.yml` credentials | +| `redis` | Port conflict | Check `lsof -ti:6379` and kill occupying process | +| `app` | Missing env var | Check `docker compose logs app` for `Failed to start server` message | + +--- + +**Migration fails — connection refused** + +Symptom: + +``` +Migration failed: Error: connect ECONNREFUSED 127.0.0.1:5432 +``` + +Cause: Running `npm run db:migrate` directly on the host (not inside the container) while +PostgreSQL is running inside Docker. + +Fix: Always run migrations inside the container during a field trial: + +```bash +docker compose exec app npm run db:migrate +``` + +--- + +**Migration fails — relation already exists** + +Symptom: + +``` +Migration failed: Error: relation "agents" already exists +``` + +Cause: A previous partial migration run left the database in an inconsistent state. + +Fix: Check which migrations have been applied: + +```bash +docker compose exec postgres psql -U agentidp -d agentidp \ + -c "SELECT name FROM schema_migrations ORDER BY name;" +``` + +If the database state cannot be repaired, reset it: + +```bash +docker compose down -v +docker compose up --build -d +docker compose exec app npm run db:migrate +``` + +> `docker compose down -v` destroys all data. Use only when a clean slate is acceptable. + +--- + +**JWT error — invalid signature or key format** + +Symptom: + +``` +Failed to start server: Error: JWT_PRIVATE_KEY and JWT_PUBLIC_KEY environment variables are required +``` + +Or: All tokens return `401 Token signature is invalid`. + +Cause: JWT keys in `.env` have incorrect PEM format — literal newlines instead of `\n` +sequences, or trailing whitespace. + +Fix: Regenerate the keys and re-write them using the exact commands from Step 0.2 and 0.3. + +Verify the key format in `.env`: + +```bash +grep "JWT_PRIVATE_KEY" .env | head -c 100 +# Expected: JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----\nMII... +# NOT: JWT_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY----- +# MII... +``` + +The entire key must be on a single line with `\n` as literal backslash-n characters, not +actual newlines. + +--- + +**Portal CORS error** + +Symptom: Browser console shows: + +``` +Access to XMLHttpRequest at 'http://localhost:3000/api/v1/...' from origin 'http://localhost:3001' +has been blocked by CORS policy: No 'Access-Control-Allow-Origin' header is present +``` + +Cause: `CORS_ORIGIN` in `.env` does not include `http://localhost:3001`, or is set to a +different value. + +Fix: + +```bash +sed -i "s|CORS_ORIGIN=.*|CORS_ORIGIN=http://localhost:3001|" .env +docker compose up --build -d +``` + +Wait for the `app` container to become healthy before retrying. + +--- + +**Tier counter not resetting** + +Symptom: All API calls return 429 `tier_limit_exceeded` even after waiting. + +Cause: The Redis tier counter was manually set in Test C.6 and not deleted. + +Fix: + +```bash +# Get your org_id from the token +ORG_ID=$(echo $NEW_ACCESS_TOKEN | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.organization_id') + +docker compose exec redis redis-cli DEL "rate:tier:calls:$ORG_ID" +docker compose exec redis redis-cli DEL "rate:tier:tokens:$ORG_ID" +``` + +--- + +**`ab` not found** + +Symptom: `ab: command not found` + +Fix: + +```bash +sudo apt-get update && sudo apt-get install -y apache2-utils +# or on macOS: +brew install httpd +``` + +--- + +**AGNTCY conformance test fails** + +Symptom: One or more tests in `npm run test:agntcy-conformance` fail. + +Diagnosis steps: + +1. Ensure the backend is running and healthy: `curl -s http://localhost:3000/health` +2. Ensure `COMPLIANCE_ENABLED=true` in `.env` (check with `grep COMPLIANCE_ENABLED .env`) +3. Ensure at least one agent has been registered (Phase B must have been completed) +4. Check the test output for the specific assertion that failed +5. Check `docker compose logs app` for errors around compliance report generation + +If the issue is a Redis cache hit returning stale data: + +```bash +docker compose exec redis redis-cli KEYS "compliance:*" | xargs docker compose exec redis redis-cli DEL +``` + +Then re-run the conformance suite. diff --git a/scripts/start-validator.sh b/scripts/start-validator.sh new file mode 100755 index 0000000..476e036 --- /dev/null +++ b/scripts/start-validator.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# ============================================================================= +# SentryAgent.ai — Start V&V Architect (Lead Validator) +# ============================================================================= +# Launches an independent Claude Code instance as the Lead Validator. +# This agent verifies the CTO's work against the PRD/OpenSpec. +# +# Usage: +# ./scripts/start-validator.sh +# ============================================================================= + +set -e + +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VALIDATOR_WORKSPACE="$PROJECT_ROOT/.validator-workspace" +VALIDATOR_PROMPT="$PROJECT_ROOT/VALIDATOR.md" + +echo "==============================================" +echo " SentryAgent.ai — Starting V&V Architect Agent" +echo "==============================================" +echo "" +echo " Project: $PROJECT_ROOT" +echo " Workspace: $VALIDATOR_WORKSPACE" +echo " Role Config: $VALIDATOR_PROMPT" +echo "" +echo " The V&V Architect will:" +echo " 1. Audit Code against OpenSpec PRD" +echo " 2. Enforce DRY Principles" +echo " 3. Log Issues for CTO Resolution" +echo " 4. Maintain Local Fail-Safe Ledger" +echo "" +echo "==============================================" + +# Ensure the Validator Workspace and Local Ledger exist +mkdir -p "$VALIDATOR_WORKSPACE/.openspec/vv_audit" + +# Verify the Validator Persona file exists (from Part 1 of instructions) +if [ ! -f "$VALIDATOR_PROMPT" ]; then + echo "ERROR: VALIDATOR.md not found at $VALIDATOR_PROMPT" + echo "Please ensure you have created the System Instruction file." + exit 1 +fi + +# Synchronize the latest CLAUDE.md to the validator workspace if needed +if [ -f "$PROJECT_ROOT/CLAUDE.md" ]; then + cp "$PROJECT_ROOT/CLAUDE.md" "$VALIDATOR_WORKSPACE/CLAUDE.md" +fi + +# Launch Claude Code as an independent Auditor +cd "$VALIDATOR_WORKSPACE" +exec claude --system-prompt-file "$VALIDATOR_PROMPT" +