feat(phase-4): WS1 — Production Hardening (Redis rate limiting, DB pool, health endpoint, k6)

Rate limiting: - Replace in-memory express-rate-limit with ioredis + rate-limiter-flexible (sliding window) - Graceful fallback to RateLimiterMemory when Redis unreachable - RATE_LIMIT_WINDOW_MS / RATE_LIMIT_MAX_REQUESTS env var config - Retry-After header on 429 responses - agentidp_rate_limit_hits_total Prometheus counter Database pool: - Explicit pg.Pool config via DB_POOL_MAX/MIN/IDLE_TIMEOUT_MS/CONNECTION_TIMEOUT_MS - Defaults: max=20, min=2, idle=30s, conn timeout=5s - agentidp_db_pool_active_connections + agentidp_db_pool_waiting_requests gauges Health endpoint: - GET /health/detailed — per-service status (database, Redis, Vault, OPA) - healthy / degraded (>1000ms) / unreachable classification - HTTP 200 (all healthy) / 207 (any degraded) / 503 (any unreachable) Load tests: - tests/load/ with k6 scenarios for agent registration (100 VUs), token issuance (1000 VUs), credential rotation (50 VUs) - npm run load-test script Tests: 586 passing, zero TypeScript errors Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-02 04:20:37 +00:00
parent b0f70b7ac4
commit 1b682c22b2
16 changed files with 1467 additions and 113 deletions
--- a/tests/load/agent-registration.js
+++ b/tests/load/agent-registration.js
@@ -0,0 +1,85 @@
+/**
+ * k6 load test — Agent Registration
+ *
+ * Scenario : POST /api/v1/agents
+ * VUs      : 100
+ * Duration : 60 seconds
+ * Thresholds:
+ *   p95 response time < 500 ms
+ *   HTTP error rate   < 1 %
+ *
+ * Usage:
+ *   BASE_URL=http://localhost:3000 k6 run tests/load/agent-registration.js
+ */
+
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+import { uuidv4 } from 'https://jslib.k6.io/k6-utils/1.4.0/index.js';
+
+// ── Custom metrics ─────────────────────────────────────────────────────────────
+const errorRate = new Rate('error_rate');
+const registrationDuration = new Trend('registration_duration_ms', true);
+
+// ── Configuration ──────────────────────────────────────────────────────────────
+export const options = {
+  vus: 100,
+  duration: '60s',
+  thresholds: {
+    // p95 of all HTTP request durations must be below 500ms
+    http_req_duration: ['p(95)<500'],
+    // Custom error rate must be below 1%
+    error_rate: ['rate<0.01'],
+  },
+};
+
+const BASE_URL = __ENV.BASE_URL || 'http://localhost:3000';
+
+// ── Default function (executed per VU iteration) ───────────────────────────────
+export default function agentRegistration() {
+  const url = `${BASE_URL}/api/v1/agents`;
+
+  const payload = JSON.stringify({
+    name: `load-test-agent-${uuidv4()}`,
+    description: 'Created by k6 load test',
+    deploymentEnvironment: 'load-test',
+    capabilities: ['data-processing'],
+    metadata: {
+      loadTest: true,
+      vu: __VU,
+      iter: __ITER,
+    },
+  });
+
+  const params = {
+    headers: {
+      'Content-Type': 'application/json',
+      Accept: 'application/json',
+    },
+    timeout: '10s',
+  };
+
+  const response = http.post(url, payload, params);
+
+  // Record custom timing
+  registrationDuration.add(response.timings.duration);
+
+  // Validate response
+  const success = check(response, {
+    'status is 201': (r) => r.status === 201,
+    'response has agentId': (r) => {
+      try {
+        const body = JSON.parse(r.body);
+        return typeof body.agentId === 'string' && body.agentId.length > 0;
+      } catch {
+        return false;
+      }
+    },
+    'response time < 500ms': (r) => r.timings.duration < 500,
+  });
+
+  errorRate.add(!success);
+
+  // Brief think-time between iterations to avoid overwhelming the server
+  sleep(0.1);
+}