feat(phase-4): WS1 — Production Hardening (Redis rate limiting, DB pool, health endpoint, k6)

Rate limiting:
- Replace in-memory express-rate-limit with ioredis + rate-limiter-flexible (sliding window)
- Graceful fallback to RateLimiterMemory when Redis unreachable
- RATE_LIMIT_WINDOW_MS / RATE_LIMIT_MAX_REQUESTS env var config
- Retry-After header on 429 responses
- agentidp_rate_limit_hits_total Prometheus counter

Database pool:
- Explicit pg.Pool config via DB_POOL_MAX/MIN/IDLE_TIMEOUT_MS/CONNECTION_TIMEOUT_MS
- Defaults: max=20, min=2, idle=30s, conn timeout=5s
- agentidp_db_pool_active_connections + agentidp_db_pool_waiting_requests gauges

Health endpoint:
- GET /health/detailed — per-service status (database, Redis, Vault, OPA)
- healthy / degraded (>1000ms) / unreachable classification
- HTTP 200 (all healthy) / 207 (any degraded) / 503 (any unreachable)

Load tests:
- tests/load/ with k6 scenarios for agent registration (100 VUs), token issuance (1000 VUs), credential rotation (50 VUs)
- npm run load-test script

Tests: 586 passing, zero TypeScript errors

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
SentryAgent.ai Developer
2026-04-02 04:20:37 +00:00
parent b0f70b7ac4
commit 1b682c22b2
16 changed files with 1467 additions and 113 deletions

View File

@@ -0,0 +1,85 @@
/**
* k6 load test — Agent Registration
*
* Scenario : POST /api/v1/agents
* VUs : 100
* Duration : 60 seconds
* Thresholds:
* p95 response time < 500 ms
* HTTP error rate < 1 %
*
* Usage:
* BASE_URL=http://localhost:3000 k6 run tests/load/agent-registration.js
*/
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
import { uuidv4 } from 'https://jslib.k6.io/k6-utils/1.4.0/index.js';
// ── Custom metrics ─────────────────────────────────────────────────────────────
const errorRate = new Rate('error_rate');
const registrationDuration = new Trend('registration_duration_ms', true);
// ── Configuration ──────────────────────────────────────────────────────────────
export const options = {
vus: 100,
duration: '60s',
thresholds: {
// p95 of all HTTP request durations must be below 500ms
http_req_duration: ['p(95)<500'],
// Custom error rate must be below 1%
error_rate: ['rate<0.01'],
},
};
const BASE_URL = __ENV.BASE_URL || 'http://localhost:3000';
// ── Default function (executed per VU iteration) ───────────────────────────────
export default function agentRegistration() {
const url = `${BASE_URL}/api/v1/agents`;
const payload = JSON.stringify({
name: `load-test-agent-${uuidv4()}`,
description: 'Created by k6 load test',
deploymentEnvironment: 'load-test',
capabilities: ['data-processing'],
metadata: {
loadTest: true,
vu: __VU,
iter: __ITER,
},
});
const params = {
headers: {
'Content-Type': 'application/json',
Accept: 'application/json',
},
timeout: '10s',
};
const response = http.post(url, payload, params);
// Record custom timing
registrationDuration.add(response.timings.duration);
// Validate response
const success = check(response, {
'status is 201': (r) => r.status === 201,
'response has agentId': (r) => {
try {
const body = JSON.parse(r.body);
return typeof body.agentId === 'string' && body.agentId.length > 0;
} catch {
return false;
}
},
'response time < 500ms': (r) => r.timings.duration < 500,
});
errorRate.add(!success);
// Brief think-time between iterations to avoid overwhelming the server
sleep(0.1);
}