feat(phase-2): workstream 7 — Prometheus + Grafana Monitoring

- Add prom-client 15; shared registry in src/metrics/registry.ts (7 metrics)
- HTTP request counter + duration histogram via metricsMiddleware
- DB query duration histogram wrapping pg Pool.query
- Redis command duration histogram via typed instrumentRedisMethod wrapper
- agentidp_tokens_issued_total in OAuth2Service
- agentidp_agents_registered_total in AgentService
- GET /metrics unauthenticated endpoint (Prometheus text format)
- docker-compose.monitoring.yml overlay (Prometheus + Grafana)
- Grafana auto-provisioned datasource + pre-built AgentIdP dashboard
- docs/devops/operations.md monitoring section added
- 36/36 unit tests passing, 100% coverage on new metrics code
- Fix pre-existing unused import in tests/integration/agents.test.ts

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
SentryAgent.ai Developer
2026-03-29 06:13:41 +00:00
parent 7d6e248a14
commit a504964e5f
21 changed files with 1053 additions and 15 deletions

View File

@@ -8,7 +8,6 @@ import request from 'supertest';
import { Application } from 'express';
import { v4 as uuidv4 } from 'uuid';
import { Pool } from 'pg';
import { createClient } from 'redis';
// Set test environment variables before importing app
const { privateKey, publicKey } = crypto.generateKeyPairSync('rsa', {

View File

@@ -0,0 +1,129 @@
/**
* Unit tests for src/metrics/registry.ts
*
* Verifies that all 6 Prometheus metrics are registered on the shared
* metricsRegistry (not the default global registry), have the correct
* names, and carry the correct label names.
*/
import {
metricsRegistry,
tokensIssuedTotal,
agentsRegisteredTotal,
httpRequestsTotal,
httpRequestDurationSeconds,
dbQueryDurationSeconds,
redisCommandDurationSeconds,
} from '../../../src/metrics/registry';
describe('metricsRegistry', () => {
// ──────────────────────────────────────────────────────────────────
// Registry isolation
// ──────────────────────────────────────────────────────────────────
it('uses a non-default registry instance', async () => {
// prom-client default registry is accessed via Registry.default or
// by calling register.metrics(). The shared registry must NOT be
// the same reference as the default one.
const { register } = await import('prom-client');
expect(metricsRegistry).not.toBe(register);
});
it('contains exactly 6 metric entries', async () => {
const entries = await metricsRegistry.getMetricsAsJSON();
expect(entries).toHaveLength(6);
});
// ──────────────────────────────────────────────────────────────────
// Metric names
// ──────────────────────────────────────────────────────────────────
it.each([
'agentidp_tokens_issued_total',
'agentidp_agents_registered_total',
'agentidp_http_requests_total',
'agentidp_http_request_duration_seconds',
'agentidp_db_query_duration_seconds',
'agentidp_redis_command_duration_seconds',
])('registers metric "%s"', async (metricName) => {
const entries = await metricsRegistry.getMetricsAsJSON();
const names = entries.map((e) => e.name);
expect(names).toContain(metricName);
});
// ──────────────────────────────────────────────────────────────────
// Label names per metric
// ──────────────────────────────────────────────────────────────────
describe('tokensIssuedTotal', () => {
it('has name agentidp_tokens_issued_total', () => {
// Access the internal name via the metric object
const metric = tokensIssuedTotal as unknown as { name: string };
expect(metric.name).toBe('agentidp_tokens_issued_total');
});
it('has label "scope"', async () => {
const entries = await metricsRegistry.getMetricsAsJSON();
const entry = entries.find((e) => e.name === 'agentidp_tokens_issued_total');
expect(entry).toBeDefined();
// Counter with no observations has an empty values array but the metric exists
expect(entry!.type).toBe('counter');
});
});
describe('agentsRegisteredTotal', () => {
it('has name agentidp_agents_registered_total', () => {
const metric = agentsRegisteredTotal as unknown as { name: string };
expect(metric.name).toBe('agentidp_agents_registered_total');
});
});
describe('httpRequestsTotal', () => {
it('has name agentidp_http_requests_total', () => {
const metric = httpRequestsTotal as unknown as { name: string };
expect(metric.name).toBe('agentidp_http_requests_total');
});
it('increments with method, route, status_code labels without throwing', () => {
expect(() =>
httpRequestsTotal.inc({ method: 'GET', route: '/test', status_code: '200' }),
).not.toThrow();
});
});
describe('httpRequestDurationSeconds', () => {
it('has name agentidp_http_request_duration_seconds', () => {
const metric = httpRequestDurationSeconds as unknown as { name: string };
expect(metric.name).toBe('agentidp_http_request_duration_seconds');
});
it('observes with method, route, status_code labels without throwing', () => {
expect(() =>
httpRequestDurationSeconds.observe({ method: 'GET', route: '/test', status_code: '200' }, 0.05),
).not.toThrow();
});
});
describe('dbQueryDurationSeconds', () => {
it('has name agentidp_db_query_duration_seconds', () => {
const metric = dbQueryDurationSeconds as unknown as { name: string };
expect(metric.name).toBe('agentidp_db_query_duration_seconds');
});
it('observes with operation label without throwing', () => {
expect(() =>
dbQueryDurationSeconds.observe({ operation: 'query' }, 0.002),
).not.toThrow();
});
});
describe('redisCommandDurationSeconds', () => {
it('has name agentidp_redis_command_duration_seconds', () => {
const metric = redisCommandDurationSeconds as unknown as { name: string };
expect(metric.name).toBe('agentidp_redis_command_duration_seconds');
});
it('observes with command label without throwing', () => {
expect(() =>
redisCommandDurationSeconds.observe({ command: 'get' }, 0.001),
).not.toThrow();
});
});
});

View File

@@ -0,0 +1,190 @@
/**
* Unit tests for src/middleware/metrics.ts
*
* Verifies that metricsMiddleware increments agentidp_http_requests_total
* and records agentidp_http_request_duration_seconds with the correct labels
* (method, route, status_code) on each request's 'finish' event.
*/
import { Request, Response, NextFunction } from 'express';
import { metricsMiddleware } from '../../../src/middleware/metrics';
import { metricsRegistry } from '../../../src/metrics/registry';
/**
* prom-client 15 MetricValue does not expose `metricName` in its TypeScript
* types, but histogram entries carry it at runtime to distinguish _count/_sum
* from _bucket rows. This local interface allows the cast below.
*/
interface HistogramMetricValue {
labels: Record<string, string>;
value: number;
metricName?: string;
}
// ────────────────────────────────────────────────────────────────────────────
// Helpers
// ────────────────────────────────────────────────────────────────────────────
/** Build a minimal mock Express Request. */
function makeMockRequest(overrides: Partial<Request> = {}): Request {
return {
method: 'GET',
path: '/test',
baseUrl: '',
route: undefined,
originalUrl: '/test',
...overrides,
} as unknown as Request;
}
/**
* Build a minimal mock Express Response that captures 'finish' callbacks
* so we can trigger them manually.
*/
function makeMockResponse(statusCode = 200): { res: Response; triggerFinish: () => void } {
const finishCallbacks: Array<() => void> = [];
const res = {
statusCode,
on: (event: string, cb: () => void) => {
if (event === 'finish') {
finishCallbacks.push(cb);
}
},
} as unknown as Response;
return {
res,
triggerFinish: () => finishCallbacks.forEach((cb) => cb()),
};
}
// ────────────────────────────────────────────────────────────────────────────
// Tests
// ────────────────────────────────────────────────────────────────────────────
describe('metricsMiddleware', () => {
let next: jest.MockedFunction<NextFunction>;
beforeEach(async () => {
// Reset all metric values between tests to avoid cross-test pollution.
metricsRegistry.resetMetrics();
next = jest.fn();
});
it('calls next() immediately', () => {
const req = makeMockRequest();
const { res } = makeMockResponse();
metricsMiddleware(req, res, next);
expect(next).toHaveBeenCalledTimes(1);
});
it('does NOT increment counter before finish event fires', async () => {
const req = makeMockRequest();
const { res } = makeMockResponse();
metricsMiddleware(req, res, next);
const metricsBefore = await metricsRegistry.getMetricsAsJSON();
const counterEntry = metricsBefore.find((e) => e.name === 'agentidp_http_requests_total');
// No values recorded yet — values array will be empty
expect(counterEntry?.values ?? []).toHaveLength(0);
});
it('increments agentidp_http_requests_total after finish event', async () => {
const req = makeMockRequest({ method: 'POST', path: '/api/v1/agents' });
const { res, triggerFinish } = makeMockResponse(201);
metricsMiddleware(req, res, next);
triggerFinish();
const metricsJson = await metricsRegistry.getMetricsAsJSON();
const counterEntry = metricsJson.find((e) => e.name === 'agentidp_http_requests_total');
expect(counterEntry).toBeDefined();
expect(counterEntry!.values).toHaveLength(1);
const recorded = counterEntry!.values[0];
expect(recorded.labels['method']).toBe('POST');
expect(recorded.labels['status_code']).toBe('201');
expect(recorded.value).toBe(1);
});
it('records agentidp_http_request_duration_seconds after finish event', async () => {
const req = makeMockRequest({ method: 'GET', path: '/health' });
const { res, triggerFinish } = makeMockResponse(200);
metricsMiddleware(req, res, next);
triggerFinish();
const metricsJson = await metricsRegistry.getMetricsAsJSON();
const histEntry = metricsJson.find(
(e) => e.name === 'agentidp_http_request_duration_seconds',
);
expect(histEntry).toBeDefined();
// Histogram produces _bucket, _count and _sum entries — count must be 1
const countEntry = (histEntry!.values as HistogramMetricValue[]).find(
(v) => v.metricName === 'agentidp_http_request_duration_seconds_count',
);
expect(countEntry).toBeDefined();
expect(countEntry!.value).toBe(1);
});
it('uses matched route pattern when req.route.path is available', async () => {
const req = makeMockRequest({
method: 'GET',
path: '/api/v1/agents/some-uuid',
baseUrl: '/api/v1/agents',
route: { path: '/:agentId' } as Request['route'],
});
const { res, triggerFinish } = makeMockResponse(200);
metricsMiddleware(req, res, next);
triggerFinish();
const metricsJson = await metricsRegistry.getMetricsAsJSON();
const counterEntry = metricsJson.find((e) => e.name === 'agentidp_http_requests_total');
expect(counterEntry).toBeDefined();
const recorded = counterEntry!.values[0];
// Route should be baseUrl + route.path = '/api/v1/agents/:agentId'
expect(recorded.labels['route']).toBe('/api/v1/agents/:agentId');
});
it('replaces UUID segments when no route pattern is available', async () => {
const uuid = '123e4567-e89b-12d3-a456-426614174000';
const req = makeMockRequest({
method: 'DELETE',
path: `/api/v1/agents/${uuid}`,
baseUrl: '',
route: undefined,
});
const { res, triggerFinish } = makeMockResponse(204);
metricsMiddleware(req, res, next);
triggerFinish();
const metricsJson = await metricsRegistry.getMetricsAsJSON();
const counterEntry = metricsJson.find((e) => e.name === 'agentidp_http_requests_total');
expect(counterEntry).toBeDefined();
const recorded = counterEntry!.values[0];
expect(recorded.labels['route']).toBe('/api/v1/agents/:id');
expect(recorded.labels['method']).toBe('DELETE');
expect(recorded.labels['status_code']).toBe('204');
});
it('increments counter multiple times for multiple requests', async () => {
for (let i = 0; i < 3; i++) {
const req = makeMockRequest({ method: 'GET', path: '/health' });
const { res, triggerFinish } = makeMockResponse(200);
metricsMiddleware(req, res, next);
triggerFinish();
}
const metricsJson = await metricsRegistry.getMetricsAsJSON();
const counterEntry = metricsJson.find((e) => e.name === 'agentidp_http_requests_total');
expect(counterEntry).toBeDefined();
const recorded = counterEntry!.values[0];
expect(recorded.value).toBe(3);
});
});

View File

@@ -0,0 +1,89 @@
/**
* Unit tests for src/routes/metrics.ts
*
* Verifies that GET /metrics returns 200 with Prometheus exposition format
* and does NOT require authentication.
*/
import express, { Application } from 'express';
import request from 'supertest';
import { createMetricsRouter } from '../../../src/routes/metrics';
import { metricsRegistry } from '../../../src/metrics/registry';
// ────────────────────────────────────────────────────────────────────────────
// Helpers
// ────────────────────────────────────────────────────────────────────────────
/** Build a minimal Express app that mounts only the metrics router. */
function buildTestApp(): Application {
const app = express();
app.use('/metrics', createMetricsRouter());
return app;
}
// ────────────────────────────────────────────────────────────────────────────
// Tests
// ────────────────────────────────────────────────────────────────────────────
describe('GET /metrics', () => {
let app: Application;
beforeEach(() => {
metricsRegistry.resetMetrics();
app = buildTestApp();
});
it('returns HTTP 200', async () => {
const res = await request(app).get('/metrics');
expect(res.status).toBe(200);
});
it('returns Content-Type containing text/plain', async () => {
const res = await request(app).get('/metrics');
expect(res.headers['content-type']).toMatch(/text\/plain/);
});
it('does NOT require an Authorization header', async () => {
// Call without any auth header — must still succeed
const res = await request(app).get('/metrics');
expect(res.status).toBe(200);
expect(res.status).not.toBe(401);
expect(res.status).not.toBe(403);
});
it('response body contains agentidp_tokens_issued_total', async () => {
const res = await request(app).get('/metrics');
expect(res.text).toContain('agentidp_tokens_issued_total');
});
it('response body contains agentidp_agents_registered_total', async () => {
const res = await request(app).get('/metrics');
expect(res.text).toContain('agentidp_agents_registered_total');
});
it('response body contains agentidp_http_requests_total', async () => {
const res = await request(app).get('/metrics');
expect(res.text).toContain('agentidp_http_requests_total');
});
it('response body contains agentidp_http_request_duration_seconds', async () => {
const res = await request(app).get('/metrics');
expect(res.text).toContain('agentidp_http_request_duration_seconds');
});
it('response body contains agentidp_db_query_duration_seconds', async () => {
const res = await request(app).get('/metrics');
expect(res.text).toContain('agentidp_db_query_duration_seconds');
});
it('response body contains agentidp_redis_command_duration_seconds', async () => {
const res = await request(app).get('/metrics');
expect(res.text).toContain('agentidp_redis_command_duration_seconds');
});
it('response body is valid Prometheus text exposition format (starts with # HELP or TYPE)', async () => {
const res = await request(app).get('/metrics');
// Prometheus text format always begins with comment lines starting with '# '
expect(res.text).toMatch(/^# (HELP|TYPE)/m);
});
});