feat(phase-4): WS1 — Production Hardening (Redis rate limiting, DB pool, health endpoint, k6)
Rate limiting: - Replace in-memory express-rate-limit with ioredis + rate-limiter-flexible (sliding window) - Graceful fallback to RateLimiterMemory when Redis unreachable - RATE_LIMIT_WINDOW_MS / RATE_LIMIT_MAX_REQUESTS env var config - Retry-After header on 429 responses - agentidp_rate_limit_hits_total Prometheus counter Database pool: - Explicit pg.Pool config via DB_POOL_MAX/MIN/IDLE_TIMEOUT_MS/CONNECTION_TIMEOUT_MS - Defaults: max=20, min=2, idle=30s, conn timeout=5s - agentidp_db_pool_active_connections + agentidp_db_pool_waiting_requests gauges Health endpoint: - GET /health/detailed — per-service status (database, Redis, Vault, OPA) - healthy / degraded (>1000ms) / unreachable classification - HTTP 200 (all healthy) / 207 (any degraded) / 503 (any unreachable) Load tests: - tests/load/ with k6 scenarios for agent registration (100 VUs), token issuance (1000 VUs), credential rotation (50 VUs) - npm run load-test script Tests: 586 passing, zero TypeScript errors Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,30 @@
|
||||
/**
|
||||
* PostgreSQL connection pool singleton.
|
||||
* All database access flows through this pool.
|
||||
*
|
||||
* Pool configuration env vars (task 2.1 / 2.2):
|
||||
* DB_POOL_MAX — maximum connections (default 20)
|
||||
* DB_POOL_MIN — minimum connections (default 2)
|
||||
* DB_POOL_IDLE_TIMEOUT_MS — idle connection timeout in ms (default 30000)
|
||||
* DB_POOL_CONNECTION_TIMEOUT_MS — connection acquisition timeout in ms (default 5000)
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { dbQueryDurationSeconds } from '../metrics/registry.js';
|
||||
import {
|
||||
dbQueryDurationSeconds,
|
||||
dbPoolActiveConnections,
|
||||
dbPoolWaitingRequests,
|
||||
} from '../metrics/registry.js';
|
||||
|
||||
let pool: Pool | null = null;
|
||||
|
||||
/**
|
||||
* Returns the singleton pg Pool instance.
|
||||
* Initialises the pool on first call using DATABASE_URL from the environment.
|
||||
* Initialises the pool on first call using DATABASE_URL and optional pool
|
||||
* tuning env vars.
|
||||
*
|
||||
* Prometheus gauges `agentidp_db_pool_active_connections` and
|
||||
* `agentidp_db_pool_waiting_requests` are updated via pool events (task 2.3).
|
||||
*
|
||||
* @returns The PostgreSQL connection pool.
|
||||
* @throws Error if DATABASE_URL is not set.
|
||||
@@ -21,13 +35,50 @@ export function getPool(): Pool {
|
||||
if (!connectionString) {
|
||||
throw new Error('DATABASE_URL environment variable is required');
|
||||
}
|
||||
pool = new Pool({ connectionString });
|
||||
|
||||
const max = parseInt(process.env['DB_POOL_MAX'] ?? '20', 10);
|
||||
const min = parseInt(process.env['DB_POOL_MIN'] ?? '2', 10);
|
||||
const idleTimeoutMillis = parseInt(process.env['DB_POOL_IDLE_TIMEOUT_MS'] ?? '30000', 10);
|
||||
const connectionTimeoutMillis = parseInt(
|
||||
process.env['DB_POOL_CONNECTION_TIMEOUT_MS'] ?? '5000',
|
||||
10,
|
||||
);
|
||||
|
||||
pool = new Pool({
|
||||
connectionString,
|
||||
max,
|
||||
min,
|
||||
idleTimeoutMillis,
|
||||
connectionTimeoutMillis,
|
||||
});
|
||||
|
||||
pool.on('error', (err: Error) => {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error('Unexpected pg pool error', err);
|
||||
});
|
||||
|
||||
// Track active connections and waiting requests via pool events (task 2.3).
|
||||
pool.on('acquire', () => {
|
||||
if (pool) {
|
||||
dbPoolActiveConnections.set(pool.totalCount - pool.idleCount);
|
||||
dbPoolWaitingRequests.set(pool.waitingCount);
|
||||
}
|
||||
});
|
||||
|
||||
pool.on('remove', () => {
|
||||
if (pool) {
|
||||
dbPoolActiveConnections.set(pool.totalCount - pool.idleCount);
|
||||
dbPoolWaitingRequests.set(pool.waitingCount);
|
||||
}
|
||||
});
|
||||
|
||||
pool.on('connect', () => {
|
||||
if (pool) {
|
||||
dbPoolActiveConnections.set(pool.totalCount - pool.idleCount);
|
||||
dbPoolWaitingRequests.set(pool.waitingCount);
|
||||
}
|
||||
});
|
||||
|
||||
// Wrap pool.query to record duration in Prometheus.
|
||||
// The pg Pool.query method is heavily overloaded — the only safe approach
|
||||
// without TypeScript errors is a typed-any wrapper on the shim itself.
|
||||
|
||||
Reference in New Issue
Block a user