feat(phase-2): workstream 8 — Multi-Region Terraform Deployment

AWS environment:
- VPC (3-AZ, public + private subnets, NAT gateways, VPC endpoints for ECR/SM/CW)
- ECS Fargate service (sentryagent/agentidp) — secrets from Secrets Manager
- RDS PostgreSQL 14 (Multi-AZ, encrypted, VPC-internal, storage autoscaling)
- ElastiCache Redis 7 (primary + replica, at-rest + in-transit encryption)
- ALB with HTTPS/443, HTTP→HTTPS redirect, ACM certificate
- Route 53 alias record

GCP environment:
- VPC + private services access + Serverless VPC connector
- Cloud Run service — secrets from Secret Manager
- Cloud SQL PostgreSQL 14 (private IP, no public endpoint)
- Cloud Memorystore Redis 7 (VPC-internal, AUTH enabled)

Shared:
- 4 reusable modules: agentidp (dual AWS/GCP), rds, redis, lb
- No hardcoded secrets; all sensitive vars marked sensitive=true
- terraform.tfvars.example for both environments
- docs/devops/deployment.md — AWS + GCP step-by-step walkthrough, rollback procedures

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
SentryAgent.ai Developer
2026-03-29 06:25:14 +00:00
parent a504964e5f
commit 6913d62648
22 changed files with 4138 additions and 8 deletions

View File

@@ -0,0 +1,640 @@
################################################################################
# Environment: aws
# Main — SentryAgent.ai AgentIdP on AWS
#
# Architecture:
# Internet → Route 53 → ALB (public subnets, HTTPS/443) →
# ECS Fargate tasks (private subnets) →
# RDS PostgreSQL 14 (private subnets, Multi-AZ) +
# ElastiCache Redis 7 (private subnets, primary + replica)
#
# All secrets stored in AWS Secrets Manager — ECS tasks pull at launch time.
# No sensitive values in state (except where Terraform internals require it).
################################################################################
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.40.0"
}
random = {
source = "hashicorp/random"
version = ">= 3.6.0"
}
}
# Remote state — configure your backend here.
# Example using S3 + DynamoDB state locking:
#
# backend "s3" {
# bucket = "sentryagent-terraform-state"
# key = "agentidp/aws/production/terraform.tfstate"
# region = "us-east-1"
# encrypt = true
# dynamodb_table = "sentryagent-terraform-locks"
# }
}
provider "aws" {
region = var.region
default_tags {
tags = {
environment = var.environment
project = var.project
managed_by = "terraform"
}
}
}
################################################################################
# Data sources
################################################################################
data "aws_caller_identity" "current" {}
data "aws_region" "current" {}
################################################################################
# VPC
################################################################################
resource "aws_vpc" "main" {
cidr_block = var.vpc_cidr
enable_dns_support = true
enable_dns_hostnames = true
tags = {
Name = "${var.project}-${var.environment}-vpc"
}
}
resource "aws_internet_gateway" "main" {
vpc_id = aws_vpc.main.id
tags = {
Name = "${var.project}-${var.environment}-igw"
}
}
################################################################################
# Subnets
################################################################################
resource "aws_subnet" "public" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = var.public_subnet_cidrs[count.index]
availability_zone = var.availability_zones[count.index]
map_public_ip_on_launch = false
tags = {
Name = "${var.project}-${var.environment}-public-${var.availability_zones[count.index]}"
tier = "public"
}
}
resource "aws_subnet" "private" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
cidr_block = var.private_subnet_cidrs[count.index]
availability_zone = var.availability_zones[count.index]
tags = {
Name = "${var.project}-${var.environment}-private-${var.availability_zones[count.index]}"
tier = "private"
}
}
################################################################################
# NAT Gateways — one per AZ for HA outbound from private subnets
# ECS tasks need outbound internet to pull ECR images and reach Secrets Manager.
################################################################################
resource "aws_eip" "nat" {
count = length(var.availability_zones)
domain = "vpc"
tags = {
Name = "${var.project}-${var.environment}-nat-eip-${var.availability_zones[count.index]}"
}
depends_on = [aws_internet_gateway.main]
}
resource "aws_nat_gateway" "main" {
count = length(var.availability_zones)
allocation_id = aws_eip.nat[count.index].id
subnet_id = aws_subnet.public[count.index].id
tags = {
Name = "${var.project}-${var.environment}-nat-${var.availability_zones[count.index]}"
}
depends_on = [aws_internet_gateway.main]
}
################################################################################
# Route Tables
################################################################################
resource "aws_route_table" "public" {
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.main.id
}
tags = {
Name = "${var.project}-${var.environment}-public-rt"
}
}
resource "aws_route_table_association" "public" {
count = length(aws_subnet.public)
subnet_id = aws_subnet.public[count.index].id
route_table_id = aws_route_table.public.id
}
resource "aws_route_table" "private" {
count = length(var.availability_zones)
vpc_id = aws_vpc.main.id
route {
cidr_block = "0.0.0.0/0"
nat_gateway_id = aws_nat_gateway.main[count.index].id
}
tags = {
Name = "${var.project}-${var.environment}-private-rt-${var.availability_zones[count.index]}"
}
}
resource "aws_route_table_association" "private" {
count = length(aws_subnet.private)
subnet_id = aws_subnet.private[count.index].id
route_table_id = aws_route_table.private[count.index].id
}
################################################################################
# VPC Endpoints — allow ECS tasks to reach AWS services without NAT
################################################################################
resource "aws_vpc_endpoint" "secretsmanager" {
vpc_id = aws_vpc.main.id
service_name = "com.amazonaws.${var.region}.secretsmanager"
vpc_endpoint_type = "Interface"
subnet_ids = aws_subnet.private[*].id
private_dns_enabled = true
tags = {
Name = "${var.project}-${var.environment}-secretsmanager-endpoint"
}
}
resource "aws_vpc_endpoint" "ecr_api" {
vpc_id = aws_vpc.main.id
service_name = "com.amazonaws.${var.region}.ecr.api"
vpc_endpoint_type = "Interface"
subnet_ids = aws_subnet.private[*].id
private_dns_enabled = true
tags = {
Name = "${var.project}-${var.environment}-ecr-api-endpoint"
}
}
resource "aws_vpc_endpoint" "ecr_dkr" {
vpc_id = aws_vpc.main.id
service_name = "com.amazonaws.${var.region}.ecr.dkr"
vpc_endpoint_type = "Interface"
subnet_ids = aws_subnet.private[*].id
private_dns_enabled = true
tags = {
Name = "${var.project}-${var.environment}-ecr-dkr-endpoint"
}
}
resource "aws_vpc_endpoint" "s3" {
vpc_id = aws_vpc.main.id
service_name = "com.amazonaws.${var.region}.s3"
vpc_endpoint_type = "Gateway"
route_table_ids = aws_route_table.private[*].id
tags = {
Name = "${var.project}-${var.environment}-s3-endpoint"
}
}
resource "aws_vpc_endpoint" "cloudwatch_logs" {
vpc_id = aws_vpc.main.id
service_name = "com.amazonaws.${var.region}.logs"
vpc_endpoint_type = "Interface"
subnet_ids = aws_subnet.private[*].id
private_dns_enabled = true
tags = {
Name = "${var.project}-${var.environment}-logs-endpoint"
}
}
################################################################################
# IAM — ECS Task Execution Role
# Allows ECS to pull images from ECR, write logs, and fetch secrets.
################################################################################
data "aws_iam_policy_document" "ecs_task_execution_assume" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["ecs-tasks.amazonaws.com"]
}
}
}
resource "aws_iam_role" "ecs_task_execution" {
name = "${var.project}-${var.environment}-ecs-execution-role"
assume_role_policy = data.aws_iam_policy_document.ecs_task_execution_assume.json
tags = {
environment = var.environment
project = var.project
}
}
resource "aws_iam_role_policy_attachment" "ecs_task_execution_managed" {
role = aws_iam_role.ecs_task_execution.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
# Allow the execution role to fetch the specific secrets it needs
data "aws_iam_policy_document" "ecs_task_execution_secrets" {
statement {
sid = "GetAppSecrets"
effect = "Allow"
actions = [
"secretsmanager:GetSecretValue",
"secretsmanager:DescribeSecret"
]
resources = concat(
[
aws_secretsmanager_secret.database_url.arn,
aws_secretsmanager_secret.redis_url.arn,
aws_secretsmanager_secret.jwt_private_key.arn,
aws_secretsmanager_secret.jwt_public_key.arn,
],
var.vault_token != "" ? [aws_secretsmanager_secret.vault_token[0].arn] : []
)
}
}
resource "aws_iam_role_policy" "ecs_task_execution_secrets" {
name = "${var.project}-${var.environment}-secrets-policy"
role = aws_iam_role.ecs_task_execution.id
policy = data.aws_iam_policy_document.ecs_task_execution_secrets.json
}
################################################################################
# IAM — ECS Task Role
# Permissions granted to the running application container.
################################################################################
resource "aws_iam_role" "ecs_task" {
name = "${var.project}-${var.environment}-ecs-task-role"
assume_role_policy = data.aws_iam_policy_document.ecs_task_execution_assume.json
tags = {
environment = var.environment
project = var.project
}
}
# ECS task role policy — extend as needed for other AWS service calls.
data "aws_iam_policy_document" "ecs_task" {
statement {
sid = "AllowCloudWatchMetrics"
effect = "Allow"
actions = [
"cloudwatch:PutMetricData"
]
resources = ["*"]
}
}
resource "aws_iam_role_policy" "ecs_task" {
name = "${var.project}-${var.environment}-task-policy"
role = aws_iam_role.ecs_task.id
policy = data.aws_iam_policy_document.ecs_task.json
}
################################################################################
# IAM — RDS Enhanced Monitoring Role
################################################################################
data "aws_iam_policy_document" "rds_monitoring_assume" {
statement {
actions = ["sts:AssumeRole"]
principals {
type = "Service"
identifiers = ["monitoring.rds.amazonaws.com"]
}
}
}
resource "aws_iam_role" "rds_monitoring" {
name = "${var.project}-${var.environment}-rds-monitoring-role"
assume_role_policy = data.aws_iam_policy_document.rds_monitoring_assume.json
tags = {
environment = var.environment
project = var.project
}
}
resource "aws_iam_role_policy_attachment" "rds_monitoring" {
role = aws_iam_role.rds_monitoring.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonRDSEnhancedMonitoringRole"
}
################################################################################
# AWS Secrets Manager — store all sensitive values
################################################################################
resource "aws_secretsmanager_secret" "database_url" {
name = "/${var.project}/${var.environment}/database-url"
description = "PostgreSQL DATABASE_URL for AgentIdP"
recovery_window_in_days = 7
tags = {
environment = var.environment
project = var.project
}
}
resource "aws_secretsmanager_secret_version" "database_url" {
secret_id = aws_secretsmanager_secret.database_url.id
# Build the DATABASE_URL using the RDS endpoint output.
# The password is passed in as var.db_password so it never appears in plaintext
# in any .tf file — only in this encrypted secret version.
secret_string = "postgresql://${var.project}:${var.db_password}@${module.rds.endpoint}:${module.rds.port}/${module.rds.db_name}?sslmode=require"
depends_on = [module.rds]
}
resource "aws_secretsmanager_secret" "redis_url" {
name = "/${var.project}/${var.environment}/redis-url"
description = "Redis REDIS_URL for AgentIdP"
recovery_window_in_days = 7
tags = {
environment = var.environment
project = var.project
}
}
resource "aws_secretsmanager_secret_version" "redis_url" {
secret_id = aws_secretsmanager_secret.redis_url.id
# ElastiCache Redis with TLS uses the rediss:// scheme and requires an AUTH token.
secret_string = "rediss://:${var.redis_auth_token}@${module.redis.primary_endpoint}:${module.redis.port}"
depends_on = [module.redis]
}
resource "aws_secretsmanager_secret" "jwt_private_key" {
name = "/${var.project}/${var.environment}/jwt-private-key"
description = "RSA-2048 private key for signing AgentIdP JWTs"
recovery_window_in_days = 7
tags = {
environment = var.environment
project = var.project
}
}
resource "aws_secretsmanager_secret_version" "jwt_private_key" {
secret_id = aws_secretsmanager_secret.jwt_private_key.id
secret_string = var.jwt_private_key
}
resource "aws_secretsmanager_secret" "jwt_public_key" {
name = "/${var.project}/${var.environment}/jwt-public-key"
description = "RSA-2048 public key for verifying AgentIdP JWTs"
recovery_window_in_days = 7
tags = {
environment = var.environment
project = var.project
}
}
resource "aws_secretsmanager_secret_version" "jwt_public_key" {
secret_id = aws_secretsmanager_secret.jwt_public_key.id
secret_string = var.jwt_public_key
}
resource "aws_secretsmanager_secret" "vault_token" {
count = var.vault_token != "" ? 1 : 0
name = "/${var.project}/${var.environment}/vault-token"
description = "HashiCorp Vault token for AgentIdP"
recovery_window_in_days = 7
tags = {
environment = var.environment
project = var.project
}
}
resource "aws_secretsmanager_secret_version" "vault_token" {
count = var.vault_token != "" ? 1 : 0
secret_id = aws_secretsmanager_secret.vault_token[0].id
secret_string = var.vault_token
}
################################################################################
# Module: Load Balancer
################################################################################
module "lb" {
source = "../../modules/lb"
environment = var.environment
project = var.project
vpc_id = aws_vpc.main.id
subnet_ids = aws_subnet.public[*].id
certificate_arn = var.certificate_arn
target_group_port = 3000
enable_deletion_protection = true
access_logs_bucket = var.alb_access_logs_bucket
}
################################################################################
# Module: RDS PostgreSQL
################################################################################
module "rds" {
source = "../../modules/rds"
environment = var.environment
project = var.project
vpc_id = aws_vpc.main.id
subnet_ids = aws_subnet.private[*].id
# The app SG is created by the agentidp module; we wire it after both modules
# are instantiated using a separate security group rule (see below).
allowed_security_group_ids = []
db_name = "sentryagent_idp"
db_username = var.project
db_password = var.db_password
instance_class = var.rds_instance_class
allocated_storage = 50
max_allocated_storage = 500
multi_az = true
backup_retention_days = var.rds_backup_retention_days
deletion_protection = var.rds_deletion_protection
skip_final_snapshot = var.rds_skip_final_snapshot
monitoring_role_arn = aws_iam_role.rds_monitoring.arn
monitoring_interval = 60
performance_insights_enabled = true
}
################################################################################
# Module: Redis
################################################################################
module "redis" {
source = "../../modules/redis"
environment = var.environment
project = var.project
vpc_id = aws_vpc.main.id
subnet_ids = aws_subnet.private[*].id
# Same pattern as RDS — app SG wired after agentidp module creates it.
allowed_security_group_ids = []
node_type = var.redis_node_type
num_cache_clusters = 2
automatic_failover_enabled = true
multi_az_enabled = true
at_rest_encryption_enabled = true
transit_encryption_enabled = true
auth_token = var.redis_auth_token
snapshot_retention_limit = 7
}
################################################################################
# Module: AgentIdP (ECS Fargate)
################################################################################
module "agentidp" {
source = "../../modules/agentidp"
provider_type = "aws"
environment = var.environment
project = var.project
app_image = "sentryagent/agentidp:${var.app_image_tag}"
app_port = 3000
aws_region = var.region
aws_vpc_id = aws_vpc.main.id
aws_subnet_ids = aws_subnet.private[*].id
aws_target_group_arn = module.lb.target_group_arn
aws_execution_role_arn = aws_iam_role.ecs_task_execution.arn
aws_task_role_arn = aws_iam_role.ecs_task.arn
aws_log_group_name = "/ecs/${var.project}-${var.environment}"
aws_desired_count = var.ecs_desired_count
aws_cpu = 512
aws_memory = 1024
aws_cors_origin = var.cors_origin
aws_policy_dir = "/app/policies"
aws_vault_addr = var.vault_addr
aws_vault_mount = var.vault_mount
aws_secret_database_url_arn = aws_secretsmanager_secret.database_url.arn
aws_secret_redis_url_arn = aws_secretsmanager_secret.redis_url.arn
aws_secret_jwt_private_key_arn = aws_secretsmanager_secret.jwt_private_key.arn
aws_secret_jwt_public_key_arn = aws_secretsmanager_secret.jwt_public_key.arn
aws_secret_vault_token_arn = var.vault_token != "" ? aws_secretsmanager_secret.vault_token[0].arn : ""
depends_on = [
aws_secretsmanager_secret_version.database_url,
aws_secretsmanager_secret_version.redis_url,
aws_secretsmanager_secret_version.jwt_private_key,
aws_secretsmanager_secret_version.jwt_public_key,
]
}
################################################################################
# Cross-module security group wiring
#
# The app SG (from agentidp module) must be allowed into RDS and Redis.
# These rules are created after both modules are fully instantiated to avoid
# circular references in the module dependency graph.
################################################################################
resource "aws_security_group_rule" "rds_from_app" {
type = "ingress"
description = "PostgreSQL from ECS app tasks"
from_port = 5432
to_port = 5432
protocol = "tcp"
source_security_group_id = module.agentidp.aws_app_security_group_id
security_group_id = module.rds.security_group_id
}
resource "aws_security_group_rule" "redis_from_app" {
type = "ingress"
description = "Redis from ECS app tasks"
from_port = 6379
to_port = 6379
protocol = "tcp"
source_security_group_id = module.agentidp.aws_app_security_group_id
security_group_id = module.redis.security_group_id
}
# Allow the ALB to reach ECS tasks on the app port
resource "aws_security_group_rule" "app_from_alb" {
type = "ingress"
description = "App port from ALB"
from_port = 3000
to_port = 3000
protocol = "tcp"
source_security_group_id = module.lb.alb_security_group_id
security_group_id = module.agentidp.aws_app_security_group_id
}
################################################################################
# Route 53 — alias record pointing the domain to the ALB
################################################################################
data "aws_route53_zone" "main" {
name = join(".", slice(split(".", var.domain_name), 1, length(split(".", var.domain_name))))
private_zone = false
}
resource "aws_route53_record" "app" {
zone_id = data.aws_route53_zone.main.zone_id
name = var.domain_name
type = "A"
alias {
name = module.lb.alb_dns_name
zone_id = module.lb.alb_zone_id
evaluate_target_health = true
}
}