feat(phase-2): workstream 8 — Multi-Region Terraform Deployment
AWS environment: - VPC (3-AZ, public + private subnets, NAT gateways, VPC endpoints for ECR/SM/CW) - ECS Fargate service (sentryagent/agentidp) — secrets from Secrets Manager - RDS PostgreSQL 14 (Multi-AZ, encrypted, VPC-internal, storage autoscaling) - ElastiCache Redis 7 (primary + replica, at-rest + in-transit encryption) - ALB with HTTPS/443, HTTP→HTTPS redirect, ACM certificate - Route 53 alias record GCP environment: - VPC + private services access + Serverless VPC connector - Cloud Run service — secrets from Secret Manager - Cloud SQL PostgreSQL 14 (private IP, no public endpoint) - Cloud Memorystore Redis 7 (VPC-internal, AUTH enabled) Shared: - 4 reusable modules: agentidp (dual AWS/GCP), rds, redis, lb - No hardcoded secrets; all sensitive vars marked sensitive=true - terraform.tfvars.example for both environments - docs/devops/deployment.md — AWS + GCP step-by-step walkthrough, rollback procedures Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
426
terraform/modules/agentidp/main.tf
Normal file
426
terraform/modules/agentidp/main.tf
Normal file
@@ -0,0 +1,426 @@
|
||||
################################################################################
|
||||
# Module: agentidp
|
||||
# Main — ECS Fargate (AWS) or Cloud Run (GCP)
|
||||
#
|
||||
# Deploys the sentryagent/agentidp container.
|
||||
# All sensitive environment variables are injected from AWS Secrets Manager
|
||||
# (AWS path) or GCP Secret Manager (GCP path) — no plaintext secrets here.
|
||||
################################################################################
|
||||
|
||||
terraform {
|
||||
required_version = ">= 1.6.0"
|
||||
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.40.0"
|
||||
}
|
||||
google = {
|
||||
source = "hashicorp/google"
|
||||
version = ">= 5.20.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# Locals
|
||||
################################################################################
|
||||
|
||||
locals {
|
||||
common_tags = {
|
||||
environment = var.environment
|
||||
project = var.project
|
||||
managed_by = "terraform"
|
||||
}
|
||||
|
||||
# Build the list of Vault-related env vars conditionally.
|
||||
# If vault_addr is empty we omit all Vault env vars entirely.
|
||||
aws_vault_env_plain = var.aws_vault_addr != "" ? [
|
||||
{
|
||||
name = "VAULT_ADDR"
|
||||
value = var.aws_vault_addr
|
||||
},
|
||||
{
|
||||
name = "VAULT_MOUNT"
|
||||
value = var.aws_vault_mount
|
||||
}
|
||||
] : []
|
||||
|
||||
aws_vault_secret_env = var.aws_secret_vault_token_arn != "" ? [
|
||||
{
|
||||
name = "VAULT_TOKEN"
|
||||
valueFrom = var.aws_secret_vault_token_arn
|
||||
}
|
||||
] : []
|
||||
|
||||
gcp_vault_env_plain = var.gcp_vault_addr != "" ? {
|
||||
VAULT_ADDR = var.gcp_vault_addr
|
||||
VAULT_MOUNT = var.gcp_vault_mount
|
||||
} : {}
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# ── AWS PATH ──────────────────────────────────────────────────────────────────
|
||||
################################################################################
|
||||
|
||||
# Security group: allow inbound traffic only from the ALB on app_port,
|
||||
# allow all outbound (needed for Secrets Manager and ECR API calls over HTTPS).
|
||||
resource "aws_security_group" "app" {
|
||||
count = var.provider_type == "aws" ? 1 : 0
|
||||
|
||||
name = "${var.project}-${var.environment}-app-sg"
|
||||
description = "Security group for AgentIdP ECS tasks — inbound from ALB only"
|
||||
vpc_id = var.aws_vpc_id
|
||||
|
||||
ingress {
|
||||
description = "App port from ALB"
|
||||
from_port = var.app_port
|
||||
to_port = var.app_port
|
||||
protocol = "tcp"
|
||||
# The ALB security group ID is not directly available here; in the root
|
||||
# environment module the ALB SG and this SG are cross-referenced.
|
||||
# The environment module passes the ALB SG id via aws_lb_security_group_id
|
||||
# below using a separate ingress rule resource to avoid circular dependency.
|
||||
cidr_blocks = []
|
||||
self = false
|
||||
}
|
||||
|
||||
egress {
|
||||
description = "All outbound"
|
||||
from_port = 0
|
||||
to_port = 0
|
||||
protocol = "-1"
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
tags = merge(local.common_tags, {
|
||||
Name = "${var.project}-${var.environment}-app-sg"
|
||||
})
|
||||
}
|
||||
|
||||
# ECS Cluster
|
||||
resource "aws_ecs_cluster" "main" {
|
||||
count = var.provider_type == "aws" ? 1 : 0
|
||||
|
||||
name = "${var.project}-${var.environment}"
|
||||
|
||||
setting {
|
||||
name = "containerInsights"
|
||||
value = "enabled"
|
||||
}
|
||||
|
||||
tags = local.common_tags
|
||||
}
|
||||
|
||||
# ECS Cluster Capacity Providers — use FARGATE and FARGATE_SPOT
|
||||
resource "aws_ecs_cluster_capacity_providers" "main" {
|
||||
count = var.provider_type == "aws" ? 1 : 0
|
||||
|
||||
cluster_name = aws_ecs_cluster.main[0].name
|
||||
capacity_providers = ["FARGATE", "FARGATE_SPOT"]
|
||||
|
||||
default_capacity_provider_strategy {
|
||||
capacity_provider = "FARGATE"
|
||||
weight = 1
|
||||
base = 1
|
||||
}
|
||||
}
|
||||
|
||||
# CloudWatch Log Group
|
||||
resource "aws_cloudwatch_log_group" "app" {
|
||||
count = var.provider_type == "aws" ? 1 : 0
|
||||
|
||||
name = var.aws_log_group_name
|
||||
retention_in_days = 30
|
||||
|
||||
tags = local.common_tags
|
||||
}
|
||||
|
||||
# ECS Task Definition
|
||||
resource "aws_ecs_task_definition" "app" {
|
||||
count = var.provider_type == "aws" ? 1 : 0
|
||||
|
||||
family = "${var.project}-${var.environment}"
|
||||
network_mode = "awsvpc"
|
||||
requires_compatibilities = ["FARGATE"]
|
||||
cpu = tostring(var.aws_cpu)
|
||||
memory = tostring(var.aws_memory)
|
||||
execution_role_arn = var.aws_execution_role_arn
|
||||
task_role_arn = var.aws_task_role_arn
|
||||
|
||||
container_definitions = jsonencode([
|
||||
{
|
||||
name = "agentidp"
|
||||
image = var.app_image
|
||||
essential = true
|
||||
|
||||
portMappings = [
|
||||
{
|
||||
containerPort = var.app_port
|
||||
protocol = "tcp"
|
||||
}
|
||||
]
|
||||
|
||||
# Plain (non-sensitive) environment variables
|
||||
environment = concat(
|
||||
[
|
||||
{ name = "PORT", value = tostring(var.app_port) },
|
||||
{ name = "NODE_ENV", value = "production" },
|
||||
{ name = "CORS_ORIGIN", value = var.aws_cors_origin },
|
||||
{ name = "POLICY_DIR", value = var.aws_policy_dir }
|
||||
],
|
||||
local.aws_vault_env_plain
|
||||
)
|
||||
|
||||
# Sensitive values fetched from Secrets Manager at task launch.
|
||||
# Each entry is injected as the named environment variable.
|
||||
secrets = concat(
|
||||
[
|
||||
{
|
||||
name = "DATABASE_URL"
|
||||
valueFrom = var.aws_secret_database_url_arn
|
||||
},
|
||||
{
|
||||
name = "REDIS_URL"
|
||||
valueFrom = var.aws_secret_redis_url_arn
|
||||
},
|
||||
{
|
||||
name = "JWT_PRIVATE_KEY"
|
||||
valueFrom = var.aws_secret_jwt_private_key_arn
|
||||
},
|
||||
{
|
||||
name = "JWT_PUBLIC_KEY"
|
||||
valueFrom = var.aws_secret_jwt_public_key_arn
|
||||
}
|
||||
],
|
||||
local.aws_vault_secret_env
|
||||
)
|
||||
|
||||
logConfiguration = {
|
||||
logDriver = "awslogs"
|
||||
options = {
|
||||
"awslogs-group" = var.aws_log_group_name
|
||||
"awslogs-region" = var.aws_region
|
||||
"awslogs-stream-prefix" = "agentidp"
|
||||
}
|
||||
}
|
||||
|
||||
healthCheck = {
|
||||
command = ["CMD-SHELL", "wget -qO- http://localhost:${var.app_port}/health || exit 1"]
|
||||
interval = 30
|
||||
timeout = 5
|
||||
retries = 3
|
||||
startPeriod = 60
|
||||
}
|
||||
|
||||
readonlyRootFilesystem = false
|
||||
user = "node"
|
||||
}
|
||||
])
|
||||
|
||||
tags = local.common_tags
|
||||
}
|
||||
|
||||
# ECS Service
|
||||
resource "aws_ecs_service" "app" {
|
||||
count = var.provider_type == "aws" ? 1 : 0
|
||||
|
||||
name = "${var.project}-${var.environment}"
|
||||
cluster = aws_ecs_cluster.main[0].id
|
||||
task_definition = aws_ecs_task_definition.app[0].arn
|
||||
desired_count = var.aws_desired_count
|
||||
launch_type = "FARGATE"
|
||||
|
||||
# Rolling update: keep at least 100% healthy tasks during deployment
|
||||
deployment_minimum_healthy_percent = 100
|
||||
deployment_maximum_percent = 200
|
||||
|
||||
network_configuration {
|
||||
subnets = var.aws_subnet_ids
|
||||
security_groups = [aws_security_group.app[0].id]
|
||||
assign_public_ip = false
|
||||
}
|
||||
|
||||
load_balancer {
|
||||
target_group_arn = var.aws_target_group_arn
|
||||
container_name = "agentidp"
|
||||
container_port = var.app_port
|
||||
}
|
||||
|
||||
# Ignore task_definition changes driven by image tag updates — deployments
|
||||
# are managed externally (CI/CD pipeline updates the image tag).
|
||||
lifecycle {
|
||||
ignore_changes = [task_definition, desired_count]
|
||||
}
|
||||
|
||||
tags = local.common_tags
|
||||
|
||||
depends_on = [aws_ecs_cluster_capacity_providers.main]
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# ── GCP PATH ──────────────────────────────────────────────────────────────────
|
||||
################################################################################
|
||||
|
||||
# Cloud Run Service
|
||||
resource "google_cloud_run_v2_service" "app" {
|
||||
count = var.provider_type == "gcp" ? 1 : 0
|
||||
|
||||
name = "${var.project}-${var.environment}"
|
||||
location = var.gcp_region
|
||||
project = var.gcp_project_id
|
||||
|
||||
# Ingress: allow only requests from the load balancer / public internet.
|
||||
# Cloud Run provides Google-managed TLS on the default *.run.app domain
|
||||
# and on any custom domains mapped via Cloud Run domain mappings.
|
||||
ingress = "INGRESS_TRAFFIC_ALL"
|
||||
|
||||
template {
|
||||
service_account = var.gcp_service_account_email
|
||||
|
||||
scaling {
|
||||
min_instance_count = var.gcp_min_instances
|
||||
max_instance_count = var.gcp_max_instances
|
||||
}
|
||||
|
||||
# VPC access — route outbound traffic through the VPC connector so the
|
||||
# container can reach Cloud SQL (private IP) and Memorystore.
|
||||
vpc_access {
|
||||
connector = var.gcp_vpc_connector_name
|
||||
egress = "PRIVATE_RANGES_ONLY"
|
||||
}
|
||||
|
||||
containers {
|
||||
image = var.app_image
|
||||
|
||||
ports {
|
||||
container_port = var.app_port
|
||||
}
|
||||
|
||||
resources {
|
||||
limits = {
|
||||
cpu = var.gcp_cpu
|
||||
memory = var.gcp_memory
|
||||
}
|
||||
cpu_idle = false
|
||||
startup_cpu_boost = true
|
||||
}
|
||||
|
||||
# Plain environment variables
|
||||
dynamic "env" {
|
||||
for_each = merge(
|
||||
{
|
||||
PORT = tostring(var.app_port)
|
||||
NODE_ENV = "production"
|
||||
CORS_ORIGIN = var.gcp_cors_origin
|
||||
POLICY_DIR = var.gcp_policy_dir
|
||||
},
|
||||
local.gcp_vault_env_plain
|
||||
)
|
||||
content {
|
||||
name = env.key
|
||||
value = env.value
|
||||
}
|
||||
}
|
||||
|
||||
# DATABASE_URL from Secret Manager
|
||||
env {
|
||||
name = "DATABASE_URL"
|
||||
value_source {
|
||||
secret_key_ref {
|
||||
secret = var.gcp_secret_database_url_id
|
||||
version = "latest"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# REDIS_URL from Secret Manager
|
||||
env {
|
||||
name = "REDIS_URL"
|
||||
value_source {
|
||||
secret_key_ref {
|
||||
secret = var.gcp_secret_redis_url_id
|
||||
version = "latest"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# JWT_PRIVATE_KEY from Secret Manager
|
||||
env {
|
||||
name = "JWT_PRIVATE_KEY"
|
||||
value_source {
|
||||
secret_key_ref {
|
||||
secret = var.gcp_secret_jwt_private_key_id
|
||||
version = "latest"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# JWT_PUBLIC_KEY from Secret Manager
|
||||
env {
|
||||
name = "JWT_PUBLIC_KEY"
|
||||
value_source {
|
||||
secret_key_ref {
|
||||
secret = var.gcp_secret_jwt_public_key_id
|
||||
version = "latest"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# VAULT_TOKEN from Secret Manager (conditional)
|
||||
dynamic "env" {
|
||||
for_each = var.gcp_secret_vault_token_id != "" ? [1] : []
|
||||
content {
|
||||
name = "VAULT_TOKEN"
|
||||
value_source {
|
||||
secret_key_ref {
|
||||
secret = var.gcp_secret_vault_token_id
|
||||
version = "latest"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
liveness_probe {
|
||||
http_get {
|
||||
path = "/health"
|
||||
port = var.app_port
|
||||
}
|
||||
initial_delay_seconds = 30
|
||||
period_seconds = 15
|
||||
failure_threshold = 3
|
||||
timeout_seconds = 5
|
||||
}
|
||||
|
||||
startup_probe {
|
||||
http_get {
|
||||
path = "/health"
|
||||
port = var.app_port
|
||||
}
|
||||
initial_delay_seconds = 10
|
||||
period_seconds = 5
|
||||
failure_threshold = 12
|
||||
timeout_seconds = 3
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
labels = {
|
||||
environment = var.environment
|
||||
project = replace(var.project, "-", "_")
|
||||
managed_by = "terraform"
|
||||
}
|
||||
}
|
||||
|
||||
# Allow unauthenticated (public internet) invocations of the Cloud Run service.
|
||||
# Authentication for AgentIdP clients is handled by the application layer
|
||||
# (JWT Bearer tokens), not by Cloud Run's built-in IAM auth.
|
||||
resource "google_cloud_run_v2_service_iam_member" "public_invoker" {
|
||||
count = var.provider_type == "gcp" ? 1 : 0
|
||||
|
||||
project = var.gcp_project_id
|
||||
location = var.gcp_region
|
||||
name = google_cloud_run_v2_service.app[0].name
|
||||
role = "roles/run.invoker"
|
||||
member = "allUsers"
|
||||
}
|
||||
Reference in New Issue
Block a user