Rate limiting: - Replace in-memory express-rate-limit with ioredis + rate-limiter-flexible (sliding window) - Graceful fallback to RateLimiterMemory when Redis unreachable - RATE_LIMIT_WINDOW_MS / RATE_LIMIT_MAX_REQUESTS env var config - Retry-After header on 429 responses - agentidp_rate_limit_hits_total Prometheus counter Database pool: - Explicit pg.Pool config via DB_POOL_MAX/MIN/IDLE_TIMEOUT_MS/CONNECTION_TIMEOUT_MS - Defaults: max=20, min=2, idle=30s, conn timeout=5s - agentidp_db_pool_active_connections + agentidp_db_pool_waiting_requests gauges Health endpoint: - GET /health/detailed — per-service status (database, Redis, Vault, OPA) - healthy / degraded (>1000ms) / unreachable classification - HTTP 200 (all healthy) / 207 (any degraded) / 503 (any unreachable) Load tests: - tests/load/ with k6 scenarios for agent registration (100 VUs), token issuance (1000 VUs), credential rotation (50 VUs) - npm run load-test script Tests: 586 passing, zero TypeScript errors Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
320 lines
11 KiB
TypeScript
320 lines
11 KiB
TypeScript
/**
|
|
* Unit tests for src/controllers/HealthDetailedController.ts
|
|
*
|
|
* Covers:
|
|
* - all services healthy → HTTP 200, status "healthy"
|
|
* - a service degraded (latency > 1000ms) → HTTP 207, status "degraded"
|
|
* - a service unreachable (throws) → HTTP 503, status "unreachable"
|
|
* - optional services (Vault, OPA) omitted when not configured
|
|
* - Vault and OPA included when URLs configured
|
|
*/
|
|
|
|
import express, { Application } from 'express';
|
|
import request from 'supertest';
|
|
import { Pool, PoolClient } from 'pg';
|
|
import { HealthDetailedController, HealthDetailedDeps } from '../../../src/controllers/HealthDetailedController';
|
|
|
|
// ── fetch mock ────────────────────────────────────────────────────────────────
|
|
|
|
type MockFetchFn = jest.MockedFunction<typeof fetch>;
|
|
const mockFetch = jest.fn() as MockFetchFn;
|
|
global.fetch = mockFetch;
|
|
|
|
// ── Helpers ────────────────────────────────────────────────────────────────────
|
|
|
|
function makePoolClient(latencyMs = 0, error?: Error): jest.Mocked<Pick<PoolClient, 'query' | 'release'>> {
|
|
return {
|
|
query: error
|
|
? jest.fn().mockRejectedValue(error)
|
|
: jest.fn().mockImplementation(() =>
|
|
new Promise((resolve) => setTimeout(() => resolve({ rows: [], rowCount: 0 }), latencyMs)),
|
|
),
|
|
release: jest.fn(),
|
|
} as unknown as jest.Mocked<Pick<PoolClient, 'query' | 'release'>>;
|
|
}
|
|
|
|
function makePool(connectError?: Error, queryLatencyMs = 0, queryError?: Error): jest.Mocked<Pool> {
|
|
return {
|
|
connect: connectError
|
|
? jest.fn().mockRejectedValue(connectError)
|
|
: jest.fn().mockResolvedValue(makePoolClient(queryLatencyMs, queryError)),
|
|
} as unknown as jest.Mocked<Pool>;
|
|
}
|
|
|
|
function makeRedisClient(pingError?: Error, latencyMs = 0): { ping(): Promise<string> } {
|
|
return {
|
|
ping: pingError
|
|
? jest.fn().mockRejectedValue(pingError)
|
|
: jest.fn().mockImplementation(() =>
|
|
new Promise((resolve) => setTimeout(() => resolve('PONG'), latencyMs)),
|
|
),
|
|
};
|
|
}
|
|
|
|
function buildApp(deps: HealthDetailedDeps): Application {
|
|
const app = express();
|
|
const controller = new HealthDetailedController(deps);
|
|
app.get('/health/detailed', controller.handle);
|
|
return app;
|
|
}
|
|
|
|
// ── Tests ──────────────────────────────────────────────────────────────────────
|
|
|
|
beforeEach(() => {
|
|
jest.clearAllMocks();
|
|
});
|
|
|
|
describe('GET /health/detailed — all services healthy', () => {
|
|
it('returns 200 with overall status "healthy" when postgres and redis respond quickly', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(undefined, 10),
|
|
redisClient: makeRedisClient(undefined, 5),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.status).toBe(200);
|
|
expect(res.body.status).toBe('healthy');
|
|
expect(res.body.services.postgres.status).toBe('healthy');
|
|
expect(res.body.services.redis.status).toBe('healthy');
|
|
});
|
|
|
|
it('includes version and uptime in the response body', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(typeof res.body.version).toBe('string');
|
|
expect(typeof res.body.uptime).toBe('number');
|
|
});
|
|
|
|
it('includes latencyMs for each service', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(typeof res.body.services.postgres.latencyMs).toBe('number');
|
|
expect(typeof res.body.services.redis.latencyMs).toBe('number');
|
|
});
|
|
});
|
|
|
|
describe('GET /health/detailed — degraded scenario', () => {
|
|
it('returns 207 when postgres is slow (> 1000ms)', async () => {
|
|
// We cannot actually wait 1000+ ms in a unit test, so we simulate by making
|
|
// the pool connect throw, then override the probe timeout.
|
|
// Instead, we test the degraded path by mocking a pool that reports > 1000ms
|
|
// via a custom pool. We achieve this by making connect resolve after 1001ms.
|
|
// Because our probe timeout is 3000ms, this simulates a degraded connection.
|
|
//
|
|
// To keep tests fast, we mock the Date.now() approach indirectly:
|
|
// a pool that resolves after a 1 ms delay can't produce > 1000ms latency —
|
|
// we test the logic contract instead by checking that slow connections
|
|
// produce `degraded` status. We achieve the required latency by patching
|
|
// the pool.connect to resolve with a controlled delay.
|
|
|
|
// We simulate a slow postgres by making the query take 1010ms via setTimeout.
|
|
// Using fake timers is not possible here with supertest async flows, so we
|
|
// take a different approach: we verify the status classification logic is wired
|
|
// correctly by patching the Date.now() calls to simulate elapsed time.
|
|
//
|
|
// Pragmatic approach: test with real timing for integration-level confidence
|
|
// by using a mock pool that resolves in 0ms but whose query artificially delays.
|
|
// Since 1010ms wait makes the test slow, we verify the classification branch
|
|
// using a pool whose connect itself rejects — that's the "unreachable" path.
|
|
// For degraded, we trust the latencyMs threshold check in the controller and
|
|
// verify it via the pool events test below.
|
|
|
|
// Simplest reliable approach: if postgres latencyMs would be 1001ms, status = degraded.
|
|
// We mock the pool.connect to capture the flow and manually verify via the
|
|
// response body latencyMs field plus the overall status.
|
|
|
|
// We use a fake pool that resolves immediately but we override Date.now to
|
|
// simulate elapsed time for the controller's latency check.
|
|
const realDateNow = Date.now;
|
|
let callCount = 0;
|
|
Date.now = jest.fn(() => {
|
|
callCount += 1;
|
|
// First call = start timestamp (200), second call = end timestamp (1201) → 1001ms
|
|
return callCount === 1 ? 200 : 1401;
|
|
});
|
|
|
|
try {
|
|
const app = buildApp({
|
|
pool: makePool(undefined, 0),
|
|
redisClient: makeRedisClient(undefined, 0),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
// postgres should be degraded (simulated 1201ms)
|
|
expect(res.status).toBe(207);
|
|
expect(res.body.status).toBe('degraded');
|
|
expect(res.body.services.postgres.status).toBe('degraded');
|
|
} finally {
|
|
Date.now = realDateNow;
|
|
}
|
|
});
|
|
|
|
it('returns 207 when redis is slow (> 1000ms)', async () => {
|
|
const realDateNow = Date.now;
|
|
let callCount = 0;
|
|
// postgres probe uses 2 Date.now() calls, redis probe uses 2 more
|
|
Date.now = jest.fn(() => {
|
|
callCount += 1;
|
|
if (callCount <= 2) {
|
|
// postgres: fast (50ms)
|
|
return callCount === 1 ? 1000 : 1050;
|
|
}
|
|
// redis: slow (1200ms)
|
|
return callCount === 3 ? 2000 : 3200;
|
|
});
|
|
|
|
try {
|
|
const app = buildApp({
|
|
pool: makePool(undefined, 0),
|
|
redisClient: makeRedisClient(undefined, 0),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.status).toBe(207);
|
|
expect(res.body.status).toBe('degraded');
|
|
expect(res.body.services.redis.status).toBe('degraded');
|
|
} finally {
|
|
Date.now = realDateNow;
|
|
}
|
|
});
|
|
});
|
|
|
|
describe('GET /health/detailed — unreachable scenarios', () => {
|
|
it('returns 503 when postgres connect() throws', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(new Error('ECONNREFUSED')),
|
|
redisClient: makeRedisClient(),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.status).toBe(503);
|
|
expect(res.body.status).toBe('unreachable');
|
|
expect(res.body.services.postgres.status).toBe('unreachable');
|
|
});
|
|
|
|
it('returns 503 when redis ping() throws', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(new Error('Redis ECONNREFUSED')),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.status).toBe(503);
|
|
expect(res.body.status).toBe('unreachable');
|
|
expect(res.body.services.redis.status).toBe('unreachable');
|
|
});
|
|
|
|
it('returns 503 when both postgres and redis are unreachable', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(new Error('PG down')),
|
|
redisClient: makeRedisClient(new Error('Redis down')),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.status).toBe(503);
|
|
expect(res.body.status).toBe('unreachable');
|
|
expect(res.body.services.postgres.status).toBe('unreachable');
|
|
expect(res.body.services.redis.status).toBe('unreachable');
|
|
});
|
|
});
|
|
|
|
describe('GET /health/detailed — optional services omitted when not configured', () => {
|
|
it('does not include vault in services when vaultAddr is not provided', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.body.services.vault).toBeUndefined();
|
|
});
|
|
|
|
it('does not include opa in services when opaUrl is not provided', async () => {
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.body.services.opa).toBeUndefined();
|
|
});
|
|
});
|
|
|
|
describe('GET /health/detailed — Vault and OPA probes', () => {
|
|
it('includes vault as healthy when Vault /v1/sys/health returns 200', async () => {
|
|
mockFetch.mockResolvedValue(new Response(null, { status: 200 }));
|
|
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
vaultAddr: 'http://vault:8200',
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.body.services.vault).toBeDefined();
|
|
expect(['healthy', 'degraded']).toContain(res.body.services.vault.status);
|
|
});
|
|
|
|
it('marks vault as unreachable when fetch throws', async () => {
|
|
mockFetch.mockRejectedValue(new Error('Network failure'));
|
|
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
vaultAddr: 'http://vault:8200',
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.body.services.vault.status).toBe('unreachable');
|
|
});
|
|
|
|
it('includes opa as healthy when OPA /health returns 200', async () => {
|
|
mockFetch.mockResolvedValue(new Response('{}', { status: 200 }));
|
|
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
opaUrl: 'http://opa:8181',
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.body.services.opa).toBeDefined();
|
|
expect(['healthy', 'degraded']).toContain(res.body.services.opa.status);
|
|
});
|
|
|
|
it('marks opa as unreachable when OPA /health returns non-200', async () => {
|
|
mockFetch.mockResolvedValue(new Response(null, { status: 503 }));
|
|
|
|
const app = buildApp({
|
|
pool: makePool(),
|
|
redisClient: makeRedisClient(),
|
|
opaUrl: 'http://opa:8181',
|
|
});
|
|
|
|
const res = await request(app).get('/health/detailed');
|
|
|
|
expect(res.body.services.opa.status).toBe('unreachable');
|
|
});
|
|
});
|