feat(phase-4): WS1 — Production Hardening (Redis rate limiting, DB pool, health endpoint, k6)
Rate limiting: - Replace in-memory express-rate-limit with ioredis + rate-limiter-flexible (sliding window) - Graceful fallback to RateLimiterMemory when Redis unreachable - RATE_LIMIT_WINDOW_MS / RATE_LIMIT_MAX_REQUESTS env var config - Retry-After header on 429 responses - agentidp_rate_limit_hits_total Prometheus counter Database pool: - Explicit pg.Pool config via DB_POOL_MAX/MIN/IDLE_TIMEOUT_MS/CONNECTION_TIMEOUT_MS - Defaults: max=20, min=2, idle=30s, conn timeout=5s - agentidp_db_pool_active_connections + agentidp_db_pool_waiting_requests gauges Health endpoint: - GET /health/detailed — per-service status (database, Redis, Vault, OPA) - healthy / degraded (>1000ms) / unreachable classification - HTTP 200 (all healthy) / 207 (any degraded) / 503 (any unreachable) Load tests: - tests/load/ with k6 scenarios for agent registration (100 VUs), token issuance (1000 VUs), credential rotation (50 VUs) - npm run load-test script Tests: 586 passing, zero TypeScript errors Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
319
tests/unit/controllers/HealthDetailedController.test.ts
Normal file
319
tests/unit/controllers/HealthDetailedController.test.ts
Normal file
@@ -0,0 +1,319 @@
|
||||
/**
|
||||
* Unit tests for src/controllers/HealthDetailedController.ts
|
||||
*
|
||||
* Covers:
|
||||
* - all services healthy → HTTP 200, status "healthy"
|
||||
* - a service degraded (latency > 1000ms) → HTTP 207, status "degraded"
|
||||
* - a service unreachable (throws) → HTTP 503, status "unreachable"
|
||||
* - optional services (Vault, OPA) omitted when not configured
|
||||
* - Vault and OPA included when URLs configured
|
||||
*/
|
||||
|
||||
import express, { Application } from 'express';
|
||||
import request from 'supertest';
|
||||
import { Pool, PoolClient } from 'pg';
|
||||
import { HealthDetailedController, HealthDetailedDeps } from '../../../src/controllers/HealthDetailedController';
|
||||
|
||||
// ── fetch mock ────────────────────────────────────────────────────────────────
|
||||
|
||||
type MockFetchFn = jest.MockedFunction<typeof fetch>;
|
||||
const mockFetch = jest.fn() as MockFetchFn;
|
||||
global.fetch = mockFetch;
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────────────────
|
||||
|
||||
function makePoolClient(latencyMs = 0, error?: Error): jest.Mocked<Pick<PoolClient, 'query' | 'release'>> {
|
||||
return {
|
||||
query: error
|
||||
? jest.fn().mockRejectedValue(error)
|
||||
: jest.fn().mockImplementation(() =>
|
||||
new Promise((resolve) => setTimeout(() => resolve({ rows: [], rowCount: 0 }), latencyMs)),
|
||||
),
|
||||
release: jest.fn(),
|
||||
} as unknown as jest.Mocked<Pick<PoolClient, 'query' | 'release'>>;
|
||||
}
|
||||
|
||||
function makePool(connectError?: Error, queryLatencyMs = 0, queryError?: Error): jest.Mocked<Pool> {
|
||||
return {
|
||||
connect: connectError
|
||||
? jest.fn().mockRejectedValue(connectError)
|
||||
: jest.fn().mockResolvedValue(makePoolClient(queryLatencyMs, queryError)),
|
||||
} as unknown as jest.Mocked<Pool>;
|
||||
}
|
||||
|
||||
function makeRedisClient(pingError?: Error, latencyMs = 0): { ping(): Promise<string> } {
|
||||
return {
|
||||
ping: pingError
|
||||
? jest.fn().mockRejectedValue(pingError)
|
||||
: jest.fn().mockImplementation(() =>
|
||||
new Promise((resolve) => setTimeout(() => resolve('PONG'), latencyMs)),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
function buildApp(deps: HealthDetailedDeps): Application {
|
||||
const app = express();
|
||||
const controller = new HealthDetailedController(deps);
|
||||
app.get('/health/detailed', controller.handle);
|
||||
return app;
|
||||
}
|
||||
|
||||
// ── Tests ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('GET /health/detailed — all services healthy', () => {
|
||||
it('returns 200 with overall status "healthy" when postgres and redis respond quickly', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(undefined, 10),
|
||||
redisClient: makeRedisClient(undefined, 5),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.status).toBe(200);
|
||||
expect(res.body.status).toBe('healthy');
|
||||
expect(res.body.services.postgres.status).toBe('healthy');
|
||||
expect(res.body.services.redis.status).toBe('healthy');
|
||||
});
|
||||
|
||||
it('includes version and uptime in the response body', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(typeof res.body.version).toBe('string');
|
||||
expect(typeof res.body.uptime).toBe('number');
|
||||
});
|
||||
|
||||
it('includes latencyMs for each service', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(typeof res.body.services.postgres.latencyMs).toBe('number');
|
||||
expect(typeof res.body.services.redis.latencyMs).toBe('number');
|
||||
});
|
||||
});
|
||||
|
||||
describe('GET /health/detailed — degraded scenario', () => {
|
||||
it('returns 207 when postgres is slow (> 1000ms)', async () => {
|
||||
// We cannot actually wait 1000+ ms in a unit test, so we simulate by making
|
||||
// the pool connect throw, then override the probe timeout.
|
||||
// Instead, we test the degraded path by mocking a pool that reports > 1000ms
|
||||
// via a custom pool. We achieve this by making connect resolve after 1001ms.
|
||||
// Because our probe timeout is 3000ms, this simulates a degraded connection.
|
||||
//
|
||||
// To keep tests fast, we mock the Date.now() approach indirectly:
|
||||
// a pool that resolves after a 1 ms delay can't produce > 1000ms latency —
|
||||
// we test the logic contract instead by checking that slow connections
|
||||
// produce `degraded` status. We achieve the required latency by patching
|
||||
// the pool.connect to resolve with a controlled delay.
|
||||
|
||||
// We simulate a slow postgres by making the query take 1010ms via setTimeout.
|
||||
// Using fake timers is not possible here with supertest async flows, so we
|
||||
// take a different approach: we verify the status classification logic is wired
|
||||
// correctly by patching the Date.now() calls to simulate elapsed time.
|
||||
//
|
||||
// Pragmatic approach: test with real timing for integration-level confidence
|
||||
// by using a mock pool that resolves in 0ms but whose query artificially delays.
|
||||
// Since 1010ms wait makes the test slow, we verify the classification branch
|
||||
// using a pool whose connect itself rejects — that's the "unreachable" path.
|
||||
// For degraded, we trust the latencyMs threshold check in the controller and
|
||||
// verify it via the pool events test below.
|
||||
|
||||
// Simplest reliable approach: if postgres latencyMs would be 1001ms, status = degraded.
|
||||
// We mock the pool.connect to capture the flow and manually verify via the
|
||||
// response body latencyMs field plus the overall status.
|
||||
|
||||
// We use a fake pool that resolves immediately but we override Date.now to
|
||||
// simulate elapsed time for the controller's latency check.
|
||||
const realDateNow = Date.now;
|
||||
let callCount = 0;
|
||||
Date.now = jest.fn(() => {
|
||||
callCount += 1;
|
||||
// First call = start timestamp (200), second call = end timestamp (1201) → 1001ms
|
||||
return callCount === 1 ? 200 : 1401;
|
||||
});
|
||||
|
||||
try {
|
||||
const app = buildApp({
|
||||
pool: makePool(undefined, 0),
|
||||
redisClient: makeRedisClient(undefined, 0),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
// postgres should be degraded (simulated 1201ms)
|
||||
expect(res.status).toBe(207);
|
||||
expect(res.body.status).toBe('degraded');
|
||||
expect(res.body.services.postgres.status).toBe('degraded');
|
||||
} finally {
|
||||
Date.now = realDateNow;
|
||||
}
|
||||
});
|
||||
|
||||
it('returns 207 when redis is slow (> 1000ms)', async () => {
|
||||
const realDateNow = Date.now;
|
||||
let callCount = 0;
|
||||
// postgres probe uses 2 Date.now() calls, redis probe uses 2 more
|
||||
Date.now = jest.fn(() => {
|
||||
callCount += 1;
|
||||
if (callCount <= 2) {
|
||||
// postgres: fast (50ms)
|
||||
return callCount === 1 ? 1000 : 1050;
|
||||
}
|
||||
// redis: slow (1200ms)
|
||||
return callCount === 3 ? 2000 : 3200;
|
||||
});
|
||||
|
||||
try {
|
||||
const app = buildApp({
|
||||
pool: makePool(undefined, 0),
|
||||
redisClient: makeRedisClient(undefined, 0),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.status).toBe(207);
|
||||
expect(res.body.status).toBe('degraded');
|
||||
expect(res.body.services.redis.status).toBe('degraded');
|
||||
} finally {
|
||||
Date.now = realDateNow;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('GET /health/detailed — unreachable scenarios', () => {
|
||||
it('returns 503 when postgres connect() throws', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(new Error('ECONNREFUSED')),
|
||||
redisClient: makeRedisClient(),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.status).toBe(503);
|
||||
expect(res.body.status).toBe('unreachable');
|
||||
expect(res.body.services.postgres.status).toBe('unreachable');
|
||||
});
|
||||
|
||||
it('returns 503 when redis ping() throws', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(new Error('Redis ECONNREFUSED')),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.status).toBe(503);
|
||||
expect(res.body.status).toBe('unreachable');
|
||||
expect(res.body.services.redis.status).toBe('unreachable');
|
||||
});
|
||||
|
||||
it('returns 503 when both postgres and redis are unreachable', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(new Error('PG down')),
|
||||
redisClient: makeRedisClient(new Error('Redis down')),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.status).toBe(503);
|
||||
expect(res.body.status).toBe('unreachable');
|
||||
expect(res.body.services.postgres.status).toBe('unreachable');
|
||||
expect(res.body.services.redis.status).toBe('unreachable');
|
||||
});
|
||||
});
|
||||
|
||||
describe('GET /health/detailed — optional services omitted when not configured', () => {
|
||||
it('does not include vault in services when vaultAddr is not provided', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.body.services.vault).toBeUndefined();
|
||||
});
|
||||
|
||||
it('does not include opa in services when opaUrl is not provided', async () => {
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.body.services.opa).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('GET /health/detailed — Vault and OPA probes', () => {
|
||||
it('includes vault as healthy when Vault /v1/sys/health returns 200', async () => {
|
||||
mockFetch.mockResolvedValue(new Response(null, { status: 200 }));
|
||||
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
vaultAddr: 'http://vault:8200',
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.body.services.vault).toBeDefined();
|
||||
expect(['healthy', 'degraded']).toContain(res.body.services.vault.status);
|
||||
});
|
||||
|
||||
it('marks vault as unreachable when fetch throws', async () => {
|
||||
mockFetch.mockRejectedValue(new Error('Network failure'));
|
||||
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
vaultAddr: 'http://vault:8200',
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.body.services.vault.status).toBe('unreachable');
|
||||
});
|
||||
|
||||
it('includes opa as healthy when OPA /health returns 200', async () => {
|
||||
mockFetch.mockResolvedValue(new Response('{}', { status: 200 }));
|
||||
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
opaUrl: 'http://opa:8181',
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.body.services.opa).toBeDefined();
|
||||
expect(['healthy', 'degraded']).toContain(res.body.services.opa.status);
|
||||
});
|
||||
|
||||
it('marks opa as unreachable when OPA /health returns non-200', async () => {
|
||||
mockFetch.mockResolvedValue(new Response(null, { status: 503 }));
|
||||
|
||||
const app = buildApp({
|
||||
pool: makePool(),
|
||||
redisClient: makeRedisClient(),
|
||||
opaUrl: 'http://opa:8181',
|
||||
});
|
||||
|
||||
const res = await request(app).get('/health/detailed');
|
||||
|
||||
expect(res.body.services.opa.status).toBe('unreachable');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user