feat(phase-2): workstream 8 — Multi-Region Terraform Deployment

AWS environment:
- VPC (3-AZ, public + private subnets, NAT gateways, VPC endpoints for ECR/SM/CW)
- ECS Fargate service (sentryagent/agentidp) — secrets from Secrets Manager
- RDS PostgreSQL 14 (Multi-AZ, encrypted, VPC-internal, storage autoscaling)
- ElastiCache Redis 7 (primary + replica, at-rest + in-transit encryption)
- ALB with HTTPS/443, HTTP→HTTPS redirect, ACM certificate
- Route 53 alias record

GCP environment:
- VPC + private services access + Serverless VPC connector
- Cloud Run service — secrets from Secret Manager
- Cloud SQL PostgreSQL 14 (private IP, no public endpoint)
- Cloud Memorystore Redis 7 (VPC-internal, AUTH enabled)

Shared:
- 4 reusable modules: agentidp (dual AWS/GCP), rds, redis, lb
- No hardcoded secrets; all sensitive vars marked sensitive=true
- terraform.tfvars.example for both environments
- docs/devops/deployment.md — AWS + GCP step-by-step walkthrough, rollback procedures

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
SentryAgent.ai Developer
2026-03-29 06:25:14 +00:00
parent a504964e5f
commit 6913d62648
22 changed files with 4138 additions and 8 deletions

View File

@@ -0,0 +1,426 @@
################################################################################
# Module: agentidp
# Main — ECS Fargate (AWS) or Cloud Run (GCP)
#
# Deploys the sentryagent/agentidp container.
# All sensitive environment variables are injected from AWS Secrets Manager
# (AWS path) or GCP Secret Manager (GCP path) — no plaintext secrets here.
################################################################################
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.40.0"
}
google = {
source = "hashicorp/google"
version = ">= 5.20.0"
}
}
}
################################################################################
# Locals
################################################################################
locals {
common_tags = {
environment = var.environment
project = var.project
managed_by = "terraform"
}
# Build the list of Vault-related env vars conditionally.
# If vault_addr is empty we omit all Vault env vars entirely.
aws_vault_env_plain = var.aws_vault_addr != "" ? [
{
name = "VAULT_ADDR"
value = var.aws_vault_addr
},
{
name = "VAULT_MOUNT"
value = var.aws_vault_mount
}
] : []
aws_vault_secret_env = var.aws_secret_vault_token_arn != "" ? [
{
name = "VAULT_TOKEN"
valueFrom = var.aws_secret_vault_token_arn
}
] : []
gcp_vault_env_plain = var.gcp_vault_addr != "" ? {
VAULT_ADDR = var.gcp_vault_addr
VAULT_MOUNT = var.gcp_vault_mount
} : {}
}
################################################################################
# ── AWS PATH ──────────────────────────────────────────────────────────────────
################################################################################
# Security group: allow inbound traffic only from the ALB on app_port,
# allow all outbound (needed for Secrets Manager and ECR API calls over HTTPS).
resource "aws_security_group" "app" {
count = var.provider_type == "aws" ? 1 : 0
name = "${var.project}-${var.environment}-app-sg"
description = "Security group for AgentIdP ECS tasks — inbound from ALB only"
vpc_id = var.aws_vpc_id
ingress {
description = "App port from ALB"
from_port = var.app_port
to_port = var.app_port
protocol = "tcp"
# The ALB security group ID is not directly available here; in the root
# environment module the ALB SG and this SG are cross-referenced.
# The environment module passes the ALB SG id via aws_lb_security_group_id
# below using a separate ingress rule resource to avoid circular dependency.
cidr_blocks = []
self = false
}
egress {
description = "All outbound"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, {
Name = "${var.project}-${var.environment}-app-sg"
})
}
# ECS Cluster
resource "aws_ecs_cluster" "main" {
count = var.provider_type == "aws" ? 1 : 0
name = "${var.project}-${var.environment}"
setting {
name = "containerInsights"
value = "enabled"
}
tags = local.common_tags
}
# ECS Cluster Capacity Providers — use FARGATE and FARGATE_SPOT
resource "aws_ecs_cluster_capacity_providers" "main" {
count = var.provider_type == "aws" ? 1 : 0
cluster_name = aws_ecs_cluster.main[0].name
capacity_providers = ["FARGATE", "FARGATE_SPOT"]
default_capacity_provider_strategy {
capacity_provider = "FARGATE"
weight = 1
base = 1
}
}
# CloudWatch Log Group
resource "aws_cloudwatch_log_group" "app" {
count = var.provider_type == "aws" ? 1 : 0
name = var.aws_log_group_name
retention_in_days = 30
tags = local.common_tags
}
# ECS Task Definition
resource "aws_ecs_task_definition" "app" {
count = var.provider_type == "aws" ? 1 : 0
family = "${var.project}-${var.environment}"
network_mode = "awsvpc"
requires_compatibilities = ["FARGATE"]
cpu = tostring(var.aws_cpu)
memory = tostring(var.aws_memory)
execution_role_arn = var.aws_execution_role_arn
task_role_arn = var.aws_task_role_arn
container_definitions = jsonencode([
{
name = "agentidp"
image = var.app_image
essential = true
portMappings = [
{
containerPort = var.app_port
protocol = "tcp"
}
]
# Plain (non-sensitive) environment variables
environment = concat(
[
{ name = "PORT", value = tostring(var.app_port) },
{ name = "NODE_ENV", value = "production" },
{ name = "CORS_ORIGIN", value = var.aws_cors_origin },
{ name = "POLICY_DIR", value = var.aws_policy_dir }
],
local.aws_vault_env_plain
)
# Sensitive values fetched from Secrets Manager at task launch.
# Each entry is injected as the named environment variable.
secrets = concat(
[
{
name = "DATABASE_URL"
valueFrom = var.aws_secret_database_url_arn
},
{
name = "REDIS_URL"
valueFrom = var.aws_secret_redis_url_arn
},
{
name = "JWT_PRIVATE_KEY"
valueFrom = var.aws_secret_jwt_private_key_arn
},
{
name = "JWT_PUBLIC_KEY"
valueFrom = var.aws_secret_jwt_public_key_arn
}
],
local.aws_vault_secret_env
)
logConfiguration = {
logDriver = "awslogs"
options = {
"awslogs-group" = var.aws_log_group_name
"awslogs-region" = var.aws_region
"awslogs-stream-prefix" = "agentidp"
}
}
healthCheck = {
command = ["CMD-SHELL", "wget -qO- http://localhost:${var.app_port}/health || exit 1"]
interval = 30
timeout = 5
retries = 3
startPeriod = 60
}
readonlyRootFilesystem = false
user = "node"
}
])
tags = local.common_tags
}
# ECS Service
resource "aws_ecs_service" "app" {
count = var.provider_type == "aws" ? 1 : 0
name = "${var.project}-${var.environment}"
cluster = aws_ecs_cluster.main[0].id
task_definition = aws_ecs_task_definition.app[0].arn
desired_count = var.aws_desired_count
launch_type = "FARGATE"
# Rolling update: keep at least 100% healthy tasks during deployment
deployment_minimum_healthy_percent = 100
deployment_maximum_percent = 200
network_configuration {
subnets = var.aws_subnet_ids
security_groups = [aws_security_group.app[0].id]
assign_public_ip = false
}
load_balancer {
target_group_arn = var.aws_target_group_arn
container_name = "agentidp"
container_port = var.app_port
}
# Ignore task_definition changes driven by image tag updates — deployments
# are managed externally (CI/CD pipeline updates the image tag).
lifecycle {
ignore_changes = [task_definition, desired_count]
}
tags = local.common_tags
depends_on = [aws_ecs_cluster_capacity_providers.main]
}
################################################################################
# ── GCP PATH ──────────────────────────────────────────────────────────────────
################################################################################
# Cloud Run Service
resource "google_cloud_run_v2_service" "app" {
count = var.provider_type == "gcp" ? 1 : 0
name = "${var.project}-${var.environment}"
location = var.gcp_region
project = var.gcp_project_id
# Ingress: allow only requests from the load balancer / public internet.
# Cloud Run provides Google-managed TLS on the default *.run.app domain
# and on any custom domains mapped via Cloud Run domain mappings.
ingress = "INGRESS_TRAFFIC_ALL"
template {
service_account = var.gcp_service_account_email
scaling {
min_instance_count = var.gcp_min_instances
max_instance_count = var.gcp_max_instances
}
# VPC access — route outbound traffic through the VPC connector so the
# container can reach Cloud SQL (private IP) and Memorystore.
vpc_access {
connector = var.gcp_vpc_connector_name
egress = "PRIVATE_RANGES_ONLY"
}
containers {
image = var.app_image
ports {
container_port = var.app_port
}
resources {
limits = {
cpu = var.gcp_cpu
memory = var.gcp_memory
}
cpu_idle = false
startup_cpu_boost = true
}
# Plain environment variables
dynamic "env" {
for_each = merge(
{
PORT = tostring(var.app_port)
NODE_ENV = "production"
CORS_ORIGIN = var.gcp_cors_origin
POLICY_DIR = var.gcp_policy_dir
},
local.gcp_vault_env_plain
)
content {
name = env.key
value = env.value
}
}
# DATABASE_URL from Secret Manager
env {
name = "DATABASE_URL"
value_source {
secret_key_ref {
secret = var.gcp_secret_database_url_id
version = "latest"
}
}
}
# REDIS_URL from Secret Manager
env {
name = "REDIS_URL"
value_source {
secret_key_ref {
secret = var.gcp_secret_redis_url_id
version = "latest"
}
}
}
# JWT_PRIVATE_KEY from Secret Manager
env {
name = "JWT_PRIVATE_KEY"
value_source {
secret_key_ref {
secret = var.gcp_secret_jwt_private_key_id
version = "latest"
}
}
}
# JWT_PUBLIC_KEY from Secret Manager
env {
name = "JWT_PUBLIC_KEY"
value_source {
secret_key_ref {
secret = var.gcp_secret_jwt_public_key_id
version = "latest"
}
}
}
# VAULT_TOKEN from Secret Manager (conditional)
dynamic "env" {
for_each = var.gcp_secret_vault_token_id != "" ? [1] : []
content {
name = "VAULT_TOKEN"
value_source {
secret_key_ref {
secret = var.gcp_secret_vault_token_id
version = "latest"
}
}
}
}
liveness_probe {
http_get {
path = "/health"
port = var.app_port
}
initial_delay_seconds = 30
period_seconds = 15
failure_threshold = 3
timeout_seconds = 5
}
startup_probe {
http_get {
path = "/health"
port = var.app_port
}
initial_delay_seconds = 10
period_seconds = 5
failure_threshold = 12
timeout_seconds = 3
}
}
}
labels = {
environment = var.environment
project = replace(var.project, "-", "_")
managed_by = "terraform"
}
}
# Allow unauthenticated (public internet) invocations of the Cloud Run service.
# Authentication for AgentIdP clients is handled by the application layer
# (JWT Bearer tokens), not by Cloud Run's built-in IAM auth.
resource "google_cloud_run_v2_service_iam_member" "public_invoker" {
count = var.provider_type == "gcp" ? 1 : 0
project = var.gcp_project_id
location = var.gcp_region
name = google_cloud_run_v2_service.app[0].name
role = "roles/run.invoker"
member = "allUsers"
}

View File

@@ -0,0 +1,55 @@
################################################################################
# Module: agentidp
# Outputs
################################################################################
# ── AWS Outputs ──────────────────────────────────────────────────────────────
output "aws_ecs_cluster_arn" {
description = "ARN of the ECS cluster hosting the AgentIdP service."
value = var.provider_type == "aws" ? aws_ecs_cluster.main[0].arn : null
}
output "aws_ecs_service_name" {
description = "Name of the ECS Fargate service."
value = var.provider_type == "aws" ? aws_ecs_service.app[0].name : null
}
output "aws_ecs_task_definition_arn" {
description = "ARN of the active ECS task definition revision."
value = var.provider_type == "aws" ? aws_ecs_task_definition.app[0].arn : null
}
output "aws_app_security_group_id" {
description = "Security group ID attached to the ECS tasks. Use this to add ingress rules from the ALB."
value = var.provider_type == "aws" ? aws_security_group.app[0].id : null
}
output "aws_cloudwatch_log_group_name" {
description = "CloudWatch log group name for ECS container logs."
value = var.provider_type == "aws" ? aws_cloudwatch_log_group.app[0].name : null
}
# ── GCP Outputs ──────────────────────────────────────────────────────────────
output "gcp_cloud_run_service_name" {
description = "Name of the Cloud Run service."
value = var.provider_type == "gcp" ? google_cloud_run_v2_service.app[0].name : null
}
output "gcp_cloud_run_service_url" {
description = "Publicly accessible HTTPS URL of the Cloud Run service (Google-managed TLS)."
value = var.provider_type == "gcp" ? google_cloud_run_v2_service.app[0].uri : null
}
output "gcp_cloud_run_service_id" {
description = "Full resource ID of the Cloud Run service."
value = var.provider_type == "gcp" ? google_cloud_run_v2_service.app[0].id : null
}
# ── Unified Outputs ───────────────────────────────────────────────────────────
output "service_url" {
description = "Publicly accessible service URL. Populated for GCP (Cloud Run native URL). For AWS use the ALB DNS name from the lb module."
value = var.provider_type == "gcp" ? google_cloud_run_v2_service.app[0].uri : null
}

View File

@@ -0,0 +1,279 @@
################################################################################
# Module: agentidp
# Variables
#
# Accepts all configuration for deploying the AgentIdP container to either
# AWS ECS Fargate (provider = "aws") or GCP Cloud Run (provider = "gcp").
################################################################################
variable "provider_type" {
description = "Cloud provider target: 'aws' or 'gcp'."
type = string
validation {
condition = contains(["aws", "gcp"], var.provider_type)
error_message = "provider_type must be either 'aws' or 'gcp'."
}
}
variable "environment" {
description = "Deployment environment label (e.g. production, staging)."
type = string
}
variable "project" {
description = "Project identifier used in resource tags and names."
type = string
default = "sentryagent-agentidp"
}
variable "app_image" {
description = "Fully-qualified container image reference including registry host and tag."
type = string
# Example: "sentryagent/agentidp:1.2.3"
}
variable "app_port" {
description = "Port the AgentIdP container listens on. Must match the PORT env var."
type = number
default = 3000
}
################################################################################
# AWS-specific variables (required when provider_type = "aws")
################################################################################
variable "aws_region" {
description = "(AWS) AWS region where ECS resources are deployed."
type = string
default = ""
}
variable "aws_vpc_id" {
description = "(AWS) VPC ID in which to create the ECS service and security group."
type = string
default = ""
}
variable "aws_subnet_ids" {
description = "(AWS) List of private subnet IDs for the ECS Fargate tasks."
type = list(string)
default = []
}
variable "aws_target_group_arn" {
description = "(AWS) ARN of the ALB target group to register ECS tasks with."
type = string
default = ""
}
variable "aws_execution_role_arn" {
description = "(AWS) IAM role ARN that ECS uses to pull images and write logs (ECS task execution role)."
type = string
default = ""
}
variable "aws_task_role_arn" {
description = "(AWS) IAM role ARN granted to the running ECS task (allows it to call Secrets Manager, etc.)."
type = string
default = ""
}
variable "aws_log_group_name" {
description = "(AWS) CloudWatch log group name where container logs are sent."
type = string
default = "/ecs/sentryagent-agentidp"
}
variable "aws_desired_count" {
description = "(AWS) Number of ECS Fargate task instances to run."
type = number
default = 2
}
variable "aws_cpu" {
description = "(AWS) ECS task CPU units (256 = 0.25 vCPU)."
type = number
default = 512
}
variable "aws_memory" {
description = "(AWS) ECS task memory in MiB."
type = number
default = 1024
}
# Secret ARNs — the ECS task fetches these from Secrets Manager at launch time.
# The task execution role must have secretsmanager:GetSecretValue on each ARN.
variable "aws_secret_database_url_arn" {
description = "(AWS) ARN of the Secrets Manager secret holding DATABASE_URL."
type = string
default = ""
sensitive = true
}
variable "aws_secret_redis_url_arn" {
description = "(AWS) ARN of the Secrets Manager secret holding REDIS_URL."
type = string
default = ""
sensitive = true
}
variable "aws_secret_jwt_private_key_arn" {
description = "(AWS) ARN of the Secrets Manager secret holding JWT_PRIVATE_KEY."
type = string
default = ""
sensitive = true
}
variable "aws_secret_jwt_public_key_arn" {
description = "(AWS) ARN of the Secrets Manager secret holding JWT_PUBLIC_KEY."
type = string
default = ""
sensitive = true
}
variable "aws_secret_vault_token_arn" {
description = "(AWS) ARN of the Secrets Manager secret holding VAULT_TOKEN. Leave empty to omit Vault integration."
type = string
default = ""
sensitive = true
}
variable "aws_vault_addr" {
description = "(AWS) HashiCorp Vault address injected as a plain env var (not a secret). Leave empty to disable."
type = string
default = ""
}
variable "aws_vault_mount" {
description = "(AWS) HashiCorp Vault KV v2 mount path."
type = string
default = "secret"
}
variable "aws_cors_origin" {
description = "(AWS) Value for CORS_ORIGIN env var."
type = string
default = "*"
}
variable "aws_policy_dir" {
description = "(AWS) Path inside the container where OPA policy files are located."
type = string
default = "/app/policies"
}
################################################################################
# GCP-specific variables (required when provider_type = "gcp")
################################################################################
variable "gcp_project_id" {
description = "(GCP) GCP project ID where Cloud Run and supporting resources live."
type = string
default = ""
}
variable "gcp_region" {
description = "(GCP) GCP region for Cloud Run deployment."
type = string
default = ""
}
variable "gcp_service_account_email" {
description = "(GCP) Service account email attached to the Cloud Run service."
type = string
default = ""
}
variable "gcp_vpc_connector_name" {
description = "(GCP) Serverless VPC Access connector name for reaching Cloud SQL and Memorystore."
type = string
default = ""
}
variable "gcp_min_instances" {
description = "(GCP) Minimum number of Cloud Run instances (set > 0 to avoid cold starts)."
type = number
default = 1
}
variable "gcp_max_instances" {
description = "(GCP) Maximum number of Cloud Run instances."
type = number
default = 10
}
variable "gcp_cpu" {
description = "(GCP) CPU limit for each Cloud Run container instance (e.g. '1', '2')."
type = string
default = "1"
}
variable "gcp_memory" {
description = "(GCP) Memory limit for each Cloud Run container instance (e.g. '512Mi', '1Gi')."
type = string
default = "512Mi"
}
# Secret Manager secret IDs — the Cloud Run service fetches these at startup.
variable "gcp_secret_database_url_id" {
description = "(GCP) Secret Manager secret ID for DATABASE_URL."
type = string
default = ""
sensitive = true
}
variable "gcp_secret_redis_url_id" {
description = "(GCP) Secret Manager secret ID for REDIS_URL."
type = string
default = ""
sensitive = true
}
variable "gcp_secret_jwt_private_key_id" {
description = "(GCP) Secret Manager secret ID for JWT_PRIVATE_KEY."
type = string
default = ""
sensitive = true
}
variable "gcp_secret_jwt_public_key_id" {
description = "(GCP) Secret Manager secret ID for JWT_PUBLIC_KEY."
type = string
default = ""
sensitive = true
}
variable "gcp_secret_vault_token_id" {
description = "(GCP) Secret Manager secret ID for VAULT_TOKEN. Leave empty to omit Vault integration."
type = string
default = ""
sensitive = true
}
variable "gcp_vault_addr" {
description = "(GCP) HashiCorp Vault address injected as a plain env var. Leave empty to disable."
type = string
default = ""
}
variable "gcp_vault_mount" {
description = "(GCP) HashiCorp Vault KV v2 mount path."
type = string
default = "secret"
}
variable "gcp_cors_origin" {
description = "(GCP) Value for CORS_ORIGIN env var."
type = string
default = "*"
}
variable "gcp_policy_dir" {
description = "(GCP) Path inside the Cloud Run container where OPA policy files are located."
type = string
default = "/app/policies"
}

View File

@@ -0,0 +1,183 @@
################################################################################
# Module: lb
# Main — AWS Application Load Balancer
#
# - Internet-facing ALB in public subnets
# - HTTPS listener (443) with ACM certificate, TLS 1.2+ enforced
# - HTTP listener (80) redirects permanently to HTTPS — no plaintext traffic
# - Target group pointing to ECS Fargate tasks on the app port
# - Access logs optionally streamed to S3
################################################################################
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.40.0"
}
}
}
locals {
identifier = "${var.project}-${var.environment}"
common_tags = {
environment = var.environment
project = var.project
managed_by = "terraform"
}
}
################################################################################
# Security Group — ALB allows inbound 80 + 443 from the internet
################################################################################
resource "aws_security_group" "alb" {
name = "${local.identifier}-alb-sg"
description = "ALB security group — inbound 80/443 from internet, outbound to app"
vpc_id = var.vpc_id
ingress {
description = "HTTP from internet (redirected to HTTPS)"
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = var.allowed_ingress_cidrs
}
ingress {
description = "HTTPS from internet"
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = var.allowed_ingress_cidrs
}
egress {
description = "Forward to ECS app tasks"
from_port = var.target_group_port
to_port = var.target_group_port
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, {
Name = "${local.identifier}-alb-sg"
})
}
################################################################################
# Application Load Balancer
################################################################################
resource "aws_lb" "main" {
name = "${local.identifier}-alb"
internal = false
load_balancer_type = "application"
security_groups = [aws_security_group.alb.id]
subnets = var.subnet_ids
idle_timeout = var.idle_timeout
enable_deletion_protection = var.enable_deletion_protection
# HTTP/2 is enabled by default on ALB; leave it on for performance.
enable_http2 = true
# Drop invalid header fields to harden against request smuggling.
drop_invalid_header_fields = true
dynamic "access_logs" {
for_each = var.access_logs_bucket != "" ? [1] : []
content {
bucket = var.access_logs_bucket
prefix = var.access_logs_prefix
enabled = true
}
}
tags = merge(local.common_tags, {
Name = "${local.identifier}-alb"
})
}
################################################################################
# Target Group — ECS Fargate tasks register here
################################################################################
resource "aws_lb_target_group" "app" {
name = "${local.identifier}-tg"
port = var.target_group_port
protocol = "HTTP"
vpc_id = var.vpc_id
target_type = "ip" # Required for Fargate (awsvpc network mode)
deregistration_delay = 30
health_check {
enabled = true
path = var.target_group_health_check_path
port = "traffic-port"
protocol = "HTTP"
interval = var.target_group_health_check_interval
timeout = var.target_group_health_check_timeout
healthy_threshold = var.target_group_healthy_threshold
unhealthy_threshold = var.target_group_unhealthy_threshold
matcher = "200"
}
stickiness {
type = "lb_cookie"
enabled = false # AgentIdP is stateless (JWT-based); no sticky sessions needed
}
tags = merge(local.common_tags, {
Name = "${local.identifier}-tg"
})
lifecycle {
create_before_destroy = true
}
}
################################################################################
# HTTPS Listener (port 443) — primary listener
################################################################################
resource "aws_lb_listener" "https" {
load_balancer_arn = aws_lb.main.arn
port = 443
protocol = "HTTPS"
ssl_policy = var.ssl_policy
certificate_arn = var.certificate_arn
default_action {
type = "forward"
target_group_arn = aws_lb_target_group.app.arn
}
tags = local.common_tags
}
################################################################################
# HTTP Listener (port 80) — permanent redirect to HTTPS
################################################################################
resource "aws_lb_listener" "http_redirect" {
load_balancer_arn = aws_lb.main.arn
port = 80
protocol = "HTTP"
default_action {
type = "redirect"
redirect {
port = "443"
protocol = "HTTPS"
status_code = "HTTP_301"
}
}
tags = local.common_tags
}

View File

@@ -0,0 +1,49 @@
################################################################################
# Module: lb
# Outputs
################################################################################
output "alb_dns_name" {
description = "DNS name of the Application Load Balancer. Create a CNAME or alias record in Route 53 pointing your domain here."
value = aws_lb.main.dns_name
}
output "alb_zone_id" {
description = "Hosted zone ID of the ALB. Use with aws_route53_record alias records."
value = aws_lb.main.zone_id
}
output "alb_arn" {
description = "ARN of the Application Load Balancer."
value = aws_lb.main.arn
}
output "alb_arn_suffix" {
description = "ARN suffix of the ALB for use in CloudWatch metrics."
value = aws_lb.main.arn_suffix
}
output "target_group_arn" {
description = "ARN of the target group. Pass to the agentidp module as aws_target_group_arn."
value = aws_lb_target_group.app.arn
}
output "target_group_arn_suffix" {
description = "ARN suffix of the target group for use in CloudWatch metrics."
value = aws_lb_target_group.app.arn_suffix
}
output "https_listener_arn" {
description = "ARN of the HTTPS listener."
value = aws_lb_listener.https.arn
}
output "http_redirect_listener_arn" {
description = "ARN of the HTTP→HTTPS redirect listener."
value = aws_lb_listener.http_redirect.arn
}
output "alb_security_group_id" {
description = "Security group ID of the ALB. Add this as an allowed source in the app task security group."
value = aws_security_group.alb.id
}

View File

@@ -0,0 +1,102 @@
################################################################################
# Module: lb
# Variables — AWS Application Load Balancer
################################################################################
variable "environment" {
description = "Deployment environment label (e.g. production, staging)."
type = string
}
variable "project" {
description = "Project identifier used in resource names and tags."
type = string
default = "sentryagent-agentidp"
}
variable "vpc_id" {
description = "VPC ID in which to create the ALB and its security group."
type = string
}
variable "subnet_ids" {
description = "List of public subnet IDs for the ALB. Must span at least 2 AZs."
type = list(string)
}
variable "certificate_arn" {
description = "ARN of the ACM certificate to attach to the HTTPS listener (port 443)."
type = string
}
variable "target_group_port" {
description = "Port that ECS task containers listen on. Target group forwards traffic to this port."
type = number
default = 3000
}
variable "target_group_health_check_path" {
description = "HTTP path used by the ALB target group health check."
type = string
default = "/health"
}
variable "target_group_health_check_interval" {
description = "Interval in seconds between ALB health checks."
type = number
default = 30
}
variable "target_group_health_check_timeout" {
description = "Timeout in seconds for each ALB health check request."
type = number
default = 5
}
variable "target_group_healthy_threshold" {
description = "Number of consecutive successful health checks before marking a target healthy."
type = number
default = 2
}
variable "target_group_unhealthy_threshold" {
description = "Number of consecutive failed health checks before marking a target unhealthy."
type = number
default = 3
}
variable "idle_timeout" {
description = "ALB idle connection timeout in seconds."
type = number
default = 60
}
variable "enable_deletion_protection" {
description = "Prevent the ALB from being deleted via the AWS API."
type = bool
default = true
}
variable "access_logs_bucket" {
description = "S3 bucket name for ALB access logs. Leave empty to disable access logging."
type = string
default = ""
}
variable "access_logs_prefix" {
description = "S3 key prefix for ALB access log files."
type = string
default = "alb"
}
variable "ssl_policy" {
description = "SSL negotiation policy for the HTTPS listener. ELBSecurityPolicy-TLS13-1-2-2021-06 enforces TLS 1.2+ and TLS 1.3."
type = string
default = "ELBSecurityPolicy-TLS13-1-2-2021-06"
}
variable "allowed_ingress_cidrs" {
description = "CIDR blocks allowed to reach the ALB on port 80 and 443. Default allows public internet."
type = list(string)
default = ["0.0.0.0/0"]
}

View File

@@ -0,0 +1,180 @@
################################################################################
# Module: rds
# Main — AWS RDS PostgreSQL 14
#
# - Multi-AZ for HA
# - Encryption at rest (AWS-managed KMS key)
# - No public access — VPC-internal only
# - Storage autoscaling up to max_allocated_storage
# - Enhanced monitoring and Performance Insights enabled by default
# - Access restricted to explicitly allowed security groups (app only)
################################################################################
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.40.0"
}
}
}
locals {
identifier = "${var.project}-${var.environment}"
common_tags = {
environment = var.environment
project = var.project
managed_by = "terraform"
}
}
################################################################################
# Security Group — only the app SGs may connect on 5432
################################################################################
resource "aws_security_group" "rds" {
name = "${local.identifier}-rds-sg"
description = "Controls inbound access to RDS PostgreSQL — allow only app SG on 5432"
vpc_id = var.vpc_id
# No ingress rules defined here — added dynamically below to avoid circular deps.
egress {
description = "All outbound (RDS initiates no outbound connections; this satisfies AWS requirement)"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, {
Name = "${local.identifier}-rds-sg"
})
}
resource "aws_security_group_rule" "rds_ingress_from_app" {
for_each = toset(var.allowed_security_group_ids)
type = "ingress"
description = "PostgreSQL from app security group"
from_port = 5432
to_port = 5432
protocol = "tcp"
source_security_group_id = each.value
security_group_id = aws_security_group.rds.id
}
################################################################################
# DB Subnet Group — must cover at least 2 AZs for Multi-AZ
################################################################################
resource "aws_db_subnet_group" "main" {
name = "${local.identifier}-db-subnet-group"
description = "Private subnets for AgentIdP RDS instance"
subnet_ids = var.subnet_ids
tags = merge(local.common_tags, {
Name = "${local.identifier}-db-subnet-group"
})
}
################################################################################
# DB Parameter Group — enforce SSL connections
################################################################################
resource "aws_db_parameter_group" "main" {
name = "${local.identifier}-pg14-params"
family = var.parameter_group_family
description = "AgentIdP custom parameter group — enforces SSL"
parameter {
name = "rds.force_ssl"
value = "1"
apply_method = "immediate"
}
parameter {
name = "log_connections"
value = "1"
apply_method = "immediate"
}
parameter {
name = "log_disconnections"
value = "1"
apply_method = "immediate"
}
parameter {
name = "log_min_duration_statement"
value = "1000"
apply_method = "immediate"
}
tags = local.common_tags
}
################################################################################
# RDS Instance
################################################################################
resource "aws_db_instance" "main" {
identifier = local.identifier
# Engine
engine = "postgres"
engine_version = "14"
instance_class = var.instance_class
# Storage
storage_type = "gp3"
allocated_storage = var.allocated_storage
max_allocated_storage = var.max_allocated_storage
storage_encrypted = true
# kms_key_id is omitted — defaults to the AWS-managed RDS KMS key.
# For customer-managed key, set kms_key_id to your CMK ARN.
# Database
db_name = var.db_name
username = var.db_username
password = var.db_password
# Network — VPC-internal only, no public endpoint
db_subnet_group_name = aws_db_subnet_group.main.name
vpc_security_group_ids = [aws_security_group.rds.id]
publicly_accessible = false
multi_az = var.multi_az
port = 5432
# Parameter group
parameter_group_name = aws_db_parameter_group.main.name
# Backups
backup_retention_period = var.backup_retention_days
backup_window = var.backup_window
delete_automated_backups = false
copy_tags_to_snapshot = true
skip_final_snapshot = var.skip_final_snapshot
final_snapshot_identifier = var.skip_final_snapshot ? null : "${local.identifier}-final-snapshot"
# Maintenance
maintenance_window = var.maintenance_window
auto_minor_version_upgrade = true
apply_immediately = false
# Observability
enabled_cloudwatch_logs_exports = ["postgresql", "upgrade"]
performance_insights_enabled = var.performance_insights_enabled
performance_insights_retention_period = var.performance_insights_enabled ? var.performance_insights_retention_period : null
monitoring_interval = var.monitoring_interval
monitoring_role_arn = var.monitoring_interval > 0 ? var.monitoring_role_arn : null
# Protection
deletion_protection = var.deletion_protection
tags = merge(local.common_tags, {
Name = local.identifier
})
}

View File

@@ -0,0 +1,44 @@
################################################################################
# Module: rds
# Outputs
################################################################################
output "endpoint" {
description = "RDS instance endpoint hostname (without port). Use to construct DATABASE_URL."
value = aws_db_instance.main.address
}
output "port" {
description = "Port the RDS instance listens on (always 5432)."
value = aws_db_instance.main.port
}
output "db_name" {
description = "Name of the database created on the RDS instance."
value = aws_db_instance.main.db_name
}
output "db_username" {
description = "Master username for the RDS instance."
value = aws_db_instance.main.username
}
output "instance_id" {
description = "RDS instance identifier."
value = aws_db_instance.main.identifier
}
output "instance_arn" {
description = "ARN of the RDS instance."
value = aws_db_instance.main.arn
}
output "security_group_id" {
description = "Security group ID attached to the RDS instance. Use to add further ingress rules if needed."
value = aws_security_group.rds.id
}
output "db_subnet_group_name" {
description = "Name of the DB subnet group."
value = aws_db_subnet_group.main.name
}

View File

@@ -0,0 +1,133 @@
################################################################################
# Module: rds
# Variables — AWS RDS PostgreSQL 14
################################################################################
variable "environment" {
description = "Deployment environment label (e.g. production, staging)."
type = string
}
variable "project" {
description = "Project identifier used in resource names and tags."
type = string
default = "sentryagent-agentidp"
}
variable "vpc_id" {
description = "VPC ID in which to create the RDS subnet group and security group."
type = string
}
variable "subnet_ids" {
description = "List of private subnet IDs for the RDS DB subnet group. Must span at least 2 AZs for Multi-AZ."
type = list(string)
}
variable "allowed_security_group_ids" {
description = "List of security group IDs (e.g. ECS app SG) permitted to connect to RDS on port 5432."
type = list(string)
default = []
}
variable "db_name" {
description = "Name of the initial PostgreSQL database to create."
type = string
default = "sentryagent_idp"
}
variable "db_username" {
description = "Master username for the RDS instance."
type = string
default = "sentryagent"
}
variable "db_password" {
description = "Master password for the RDS instance. Store this in Secrets Manager; do not hardcode."
type = string
sensitive = true
}
variable "instance_class" {
description = "RDS instance class."
type = string
default = "db.t3.medium"
}
variable "allocated_storage" {
description = "Initial storage allocated in GiB."
type = number
default = 50
}
variable "max_allocated_storage" {
description = "Upper bound for RDS storage autoscaling in GiB. Set to 0 to disable autoscaling."
type = number
default = 500
}
variable "multi_az" {
description = "Enable Multi-AZ deployment for high availability."
type = bool
default = true
}
variable "backup_retention_days" {
description = "Number of days to retain automated backups. Must be >= 1 for Multi-AZ."
type = number
default = 7
}
variable "backup_window" {
description = "Preferred daily backup window in UTC (hh24:mi-hh24:mi)."
type = string
default = "03:00-04:00"
}
variable "maintenance_window" {
description = "Preferred weekly maintenance window (ddd:hh24:mi-ddd:hh24:mi in UTC)."
type = string
default = "sun:05:00-sun:06:00"
}
variable "deletion_protection" {
description = "Enable deletion protection. Set to false only when decommissioning."
type = bool
default = true
}
variable "skip_final_snapshot" {
description = "Whether to skip the final DB snapshot on destroy. Should be false in production."
type = bool
default = false
}
variable "performance_insights_enabled" {
description = "Enable RDS Performance Insights."
type = bool
default = true
}
variable "performance_insights_retention_period" {
description = "Performance Insights data retention in days. Free tier = 7; paid tiers = 731."
type = number
default = 7
}
variable "monitoring_interval" {
description = "Enhanced monitoring interval in seconds (0 to disable, valid: 1, 5, 10, 15, 30, 60)."
type = number
default = 60
}
variable "monitoring_role_arn" {
description = "IAM role ARN for RDS Enhanced Monitoring. Required when monitoring_interval > 0."
type = string
default = ""
}
variable "parameter_group_family" {
description = "DB parameter group family."
type = string
default = "postgres14"
}

View File

@@ -0,0 +1,176 @@
################################################################################
# Module: redis
# Main — AWS ElastiCache Redis 7
#
# - Single shard (cluster mode disabled): one primary + one replica
# - Encryption at rest and in transit (TLS)
# - AUTH token required when transit encryption is enabled
# - VPC-internal only — no public access
# - Access restricted to explicitly allowed security groups (app only)
# - Slow log + engine log delivery to CloudWatch
################################################################################
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.40.0"
}
}
}
locals {
identifier = "${var.project}-${var.environment}"
common_tags = {
environment = var.environment
project = var.project
managed_by = "terraform"
}
}
################################################################################
# CloudWatch Log Group for Redis logs
################################################################################
resource "aws_cloudwatch_log_group" "redis" {
count = var.log_delivery_enabled ? 1 : 0
name = var.log_group_name
retention_in_days = 30
tags = local.common_tags
}
################################################################################
# Security Group — only the app SGs may connect on 6379
################################################################################
resource "aws_security_group" "redis" {
name = "${local.identifier}-redis-sg"
description = "Controls inbound access to ElastiCache Redis — allow only app SG on 6379"
vpc_id = var.vpc_id
egress {
description = "All outbound"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, {
Name = "${local.identifier}-redis-sg"
})
}
resource "aws_security_group_rule" "redis_ingress_from_app" {
for_each = toset(var.allowed_security_group_ids)
type = "ingress"
description = "Redis from app security group"
from_port = 6379
to_port = 6379
protocol = "tcp"
source_security_group_id = each.value
security_group_id = aws_security_group.redis.id
}
################################################################################
# ElastiCache Subnet Group
################################################################################
resource "aws_elasticache_subnet_group" "main" {
name = "${local.identifier}-redis-subnet-group"
description = "Private subnets for AgentIdP ElastiCache Redis"
subnet_ids = var.subnet_ids
tags = local.common_tags
}
################################################################################
# ElastiCache Parameter Group — Redis 7.x defaults are fine; custom group
# allows future tuning without recreating the replication group.
################################################################################
resource "aws_elasticache_parameter_group" "main" {
name = "${local.identifier}-redis7-params"
family = "redis7"
description = "AgentIdP Redis 7 parameter group"
# Disable dangerous commands that could truncate data in production
parameter {
name = "lazyfree-lazy-eviction"
value = "yes"
}
parameter {
name = "lazyfree-lazy-expire"
value = "yes"
}
tags = local.common_tags
}
################################################################################
# ElastiCache Replication Group (cluster mode disabled)
#
# cluster_mode = 0 (disabled) gives a single-shard setup:
# - 1 primary node
# - num_cache_clusters - 1 replica nodes
# This matches the application usage: token revocation (SET/GET/DEL),
# rate limiting (INCR/EXPIRE), and monthly counters (INCR) — no sharding needed.
################################################################################
resource "aws_elasticache_replication_group" "main" {
replication_group_id = local.identifier
description = "AgentIdP Redis 7 — token revocation, rate limiting, counters"
# Engine
engine = "redis"
engine_version = var.engine_version
node_type = var.node_type
parameter_group_name = aws_elasticache_parameter_group.main.name
port = 6379
# Topology — single shard, primary + replica
num_cache_clusters = var.num_cache_clusters
automatic_failover_enabled = var.automatic_failover_enabled
multi_az_enabled = var.multi_az_enabled
# Network — VPC-internal, no public endpoints
subnet_group_name = aws_elasticache_subnet_group.main.name
security_group_ids = [aws_security_group.redis.id]
# Security
at_rest_encryption_enabled = var.at_rest_encryption_enabled
transit_encryption_enabled = var.transit_encryption_enabled
auth_token = var.transit_encryption_enabled && var.auth_token != "" ? var.auth_token : null
# Maintenance and snapshots
maintenance_window = var.maintenance_window
snapshot_retention_limit = var.snapshot_retention_limit
snapshot_window = var.snapshot_window
apply_immediately = var.apply_immediately
# Log delivery to CloudWatch
dynamic "log_delivery_configuration" {
for_each = var.log_delivery_enabled ? [
{ log_type = "slow-log", log_format = "json" },
{ log_type = "engine-log", log_format = "json" }
] : []
content {
destination = var.log_delivery_enabled ? aws_cloudwatch_log_group.redis[0].name : ""
destination_type = "cloudwatch-logs"
log_format = log_delivery_configuration.value.log_format
log_type = log_delivery_configuration.value.log_type
}
}
tags = merge(local.common_tags, {
Name = local.identifier
})
}

View File

@@ -0,0 +1,34 @@
################################################################################
# Module: redis
# Outputs
################################################################################
output "primary_endpoint" {
description = "Primary endpoint hostname for write operations. Use to construct REDIS_URL."
value = aws_elasticache_replication_group.main.primary_endpoint_address
}
output "reader_endpoint" {
description = "Reader endpoint for read operations (load-balanced across replicas)."
value = aws_elasticache_replication_group.main.reader_endpoint_address
}
output "port" {
description = "Port the Redis replication group listens on (always 6379)."
value = aws_elasticache_replication_group.main.port
}
output "replication_group_id" {
description = "ID of the ElastiCache replication group."
value = aws_elasticache_replication_group.main.replication_group_id
}
output "security_group_id" {
description = "Security group ID attached to the replication group. Use to add further ingress rules."
value = aws_security_group.redis.id
}
output "redis_url" {
description = "Constructed REDIS_URL using the primary endpoint. Includes rediss:// (TLS) scheme when transit encryption is enabled."
value = var.transit_encryption_enabled ? "rediss://${aws_elasticache_replication_group.main.primary_endpoint_address}:${aws_elasticache_replication_group.main.port}" : "redis://${aws_elasticache_replication_group.main.primary_endpoint_address}:${aws_elasticache_replication_group.main.port}"
}

View File

@@ -0,0 +1,116 @@
################################################################################
# Module: redis
# Variables — AWS ElastiCache Redis 7
################################################################################
variable "environment" {
description = "Deployment environment label (e.g. production, staging)."
type = string
}
variable "project" {
description = "Project identifier used in resource names and tags."
type = string
default = "sentryagent-agentidp"
}
variable "vpc_id" {
description = "VPC ID in which to create the ElastiCache subnet group and security group."
type = string
}
variable "subnet_ids" {
description = "List of private subnet IDs for the ElastiCache subnet group. Span at least 2 AZs."
type = list(string)
}
variable "allowed_security_group_ids" {
description = "List of security group IDs (e.g. ECS app SG) permitted to connect to Redis on port 6379."
type = list(string)
default = []
}
variable "node_type" {
description = "ElastiCache node instance type."
type = string
default = "cache.t3.medium"
}
variable "engine_version" {
description = "Redis engine version. Use 7.x for Redis 7."
type = string
default = "7.1"
}
variable "num_cache_clusters" {
description = "Total number of cache clusters in the replication group (1 primary + N replicas). Minimum 2 for HA."
type = number
default = 2
}
variable "automatic_failover_enabled" {
description = "Enable automatic failover. Required when num_cache_clusters > 1."
type = bool
default = true
}
variable "multi_az_enabled" {
description = "Enable Multi-AZ for the replication group."
type = bool
default = true
}
variable "at_rest_encryption_enabled" {
description = "Encrypt data at rest."
type = bool
default = true
}
variable "transit_encryption_enabled" {
description = "Enable TLS for data in transit."
type = bool
default = true
}
variable "auth_token" {
description = "AUTH token (password) for Redis AUTH command. Required when transit_encryption_enabled = true. Minimum 16 characters."
type = string
sensitive = true
default = ""
}
variable "maintenance_window" {
description = "Preferred weekly maintenance window (ddd:hh24:mi-ddd:hh24:mi in UTC)."
type = string
default = "sun:06:00-sun:07:00"
}
variable "snapshot_retention_limit" {
description = "Number of days to retain automatic Redis snapshots. 0 disables snapshots."
type = number
default = 7
}
variable "snapshot_window" {
description = "Daily time range for automatic snapshots (hh24:mi-hh24:mi in UTC). Must not overlap maintenance_window."
type = string
default = "04:00-05:00"
}
variable "apply_immediately" {
description = "Apply changes immediately. Set to false to wait for the next maintenance window in production."
type = bool
default = false
}
variable "log_delivery_enabled" {
description = "Enable delivery of Redis slow logs and engine logs to CloudWatch."
type = bool
default = true
}
variable "log_group_name" {
description = "CloudWatch log group name for Redis logs. Created if it does not exist."
type = string
default = "/elasticache/sentryagent-agentidp/redis"
}