AWS environment: - VPC (3-AZ, public + private subnets, NAT gateways, VPC endpoints for ECR/SM/CW) - ECS Fargate service (sentryagent/agentidp) — secrets from Secrets Manager - RDS PostgreSQL 14 (Multi-AZ, encrypted, VPC-internal, storage autoscaling) - ElastiCache Redis 7 (primary + replica, at-rest + in-transit encryption) - ALB with HTTPS/443, HTTP→HTTPS redirect, ACM certificate - Route 53 alias record GCP environment: - VPC + private services access + Serverless VPC connector - Cloud Run service — secrets from Secret Manager - Cloud SQL PostgreSQL 14 (private IP, no public endpoint) - Cloud Memorystore Redis 7 (VPC-internal, AUTH enabled) Shared: - 4 reusable modules: agentidp (dual AWS/GCP), rds, redis, lb - No hardcoded secrets; all sensitive vars marked sensitive=true - terraform.tfvars.example for both environments - docs/devops/deployment.md — AWS + GCP step-by-step walkthrough, rollback procedures Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
427 lines
12 KiB
HCL
427 lines
12 KiB
HCL
################################################################################
|
|
# Module: agentidp
|
|
# Main — ECS Fargate (AWS) or Cloud Run (GCP)
|
|
#
|
|
# Deploys the sentryagent/agentidp container.
|
|
# All sensitive environment variables are injected from AWS Secrets Manager
|
|
# (AWS path) or GCP Secret Manager (GCP path) — no plaintext secrets here.
|
|
################################################################################
|
|
|
|
terraform {
|
|
required_version = ">= 1.6.0"
|
|
|
|
required_providers {
|
|
aws = {
|
|
source = "hashicorp/aws"
|
|
version = ">= 5.40.0"
|
|
}
|
|
google = {
|
|
source = "hashicorp/google"
|
|
version = ">= 5.20.0"
|
|
}
|
|
}
|
|
}
|
|
|
|
################################################################################
|
|
# Locals
|
|
################################################################################
|
|
|
|
locals {
|
|
common_tags = {
|
|
environment = var.environment
|
|
project = var.project
|
|
managed_by = "terraform"
|
|
}
|
|
|
|
# Build the list of Vault-related env vars conditionally.
|
|
# If vault_addr is empty we omit all Vault env vars entirely.
|
|
aws_vault_env_plain = var.aws_vault_addr != "" ? [
|
|
{
|
|
name = "VAULT_ADDR"
|
|
value = var.aws_vault_addr
|
|
},
|
|
{
|
|
name = "VAULT_MOUNT"
|
|
value = var.aws_vault_mount
|
|
}
|
|
] : []
|
|
|
|
aws_vault_secret_env = var.aws_secret_vault_token_arn != "" ? [
|
|
{
|
|
name = "VAULT_TOKEN"
|
|
valueFrom = var.aws_secret_vault_token_arn
|
|
}
|
|
] : []
|
|
|
|
gcp_vault_env_plain = var.gcp_vault_addr != "" ? {
|
|
VAULT_ADDR = var.gcp_vault_addr
|
|
VAULT_MOUNT = var.gcp_vault_mount
|
|
} : {}
|
|
}
|
|
|
|
################################################################################
|
|
# ── AWS PATH ──────────────────────────────────────────────────────────────────
|
|
################################################################################
|
|
|
|
# Security group: allow inbound traffic only from the ALB on app_port,
|
|
# allow all outbound (needed for Secrets Manager and ECR API calls over HTTPS).
|
|
resource "aws_security_group" "app" {
|
|
count = var.provider_type == "aws" ? 1 : 0
|
|
|
|
name = "${var.project}-${var.environment}-app-sg"
|
|
description = "Security group for AgentIdP ECS tasks — inbound from ALB only"
|
|
vpc_id = var.aws_vpc_id
|
|
|
|
ingress {
|
|
description = "App port from ALB"
|
|
from_port = var.app_port
|
|
to_port = var.app_port
|
|
protocol = "tcp"
|
|
# The ALB security group ID is not directly available here; in the root
|
|
# environment module the ALB SG and this SG are cross-referenced.
|
|
# The environment module passes the ALB SG id via aws_lb_security_group_id
|
|
# below using a separate ingress rule resource to avoid circular dependency.
|
|
cidr_blocks = []
|
|
self = false
|
|
}
|
|
|
|
egress {
|
|
description = "All outbound"
|
|
from_port = 0
|
|
to_port = 0
|
|
protocol = "-1"
|
|
cidr_blocks = ["0.0.0.0/0"]
|
|
}
|
|
|
|
tags = merge(local.common_tags, {
|
|
Name = "${var.project}-${var.environment}-app-sg"
|
|
})
|
|
}
|
|
|
|
# ECS Cluster
|
|
resource "aws_ecs_cluster" "main" {
|
|
count = var.provider_type == "aws" ? 1 : 0
|
|
|
|
name = "${var.project}-${var.environment}"
|
|
|
|
setting {
|
|
name = "containerInsights"
|
|
value = "enabled"
|
|
}
|
|
|
|
tags = local.common_tags
|
|
}
|
|
|
|
# ECS Cluster Capacity Providers — use FARGATE and FARGATE_SPOT
|
|
resource "aws_ecs_cluster_capacity_providers" "main" {
|
|
count = var.provider_type == "aws" ? 1 : 0
|
|
|
|
cluster_name = aws_ecs_cluster.main[0].name
|
|
capacity_providers = ["FARGATE", "FARGATE_SPOT"]
|
|
|
|
default_capacity_provider_strategy {
|
|
capacity_provider = "FARGATE"
|
|
weight = 1
|
|
base = 1
|
|
}
|
|
}
|
|
|
|
# CloudWatch Log Group
|
|
resource "aws_cloudwatch_log_group" "app" {
|
|
count = var.provider_type == "aws" ? 1 : 0
|
|
|
|
name = var.aws_log_group_name
|
|
retention_in_days = 30
|
|
|
|
tags = local.common_tags
|
|
}
|
|
|
|
# ECS Task Definition
|
|
resource "aws_ecs_task_definition" "app" {
|
|
count = var.provider_type == "aws" ? 1 : 0
|
|
|
|
family = "${var.project}-${var.environment}"
|
|
network_mode = "awsvpc"
|
|
requires_compatibilities = ["FARGATE"]
|
|
cpu = tostring(var.aws_cpu)
|
|
memory = tostring(var.aws_memory)
|
|
execution_role_arn = var.aws_execution_role_arn
|
|
task_role_arn = var.aws_task_role_arn
|
|
|
|
container_definitions = jsonencode([
|
|
{
|
|
name = "agentidp"
|
|
image = var.app_image
|
|
essential = true
|
|
|
|
portMappings = [
|
|
{
|
|
containerPort = var.app_port
|
|
protocol = "tcp"
|
|
}
|
|
]
|
|
|
|
# Plain (non-sensitive) environment variables
|
|
environment = concat(
|
|
[
|
|
{ name = "PORT", value = tostring(var.app_port) },
|
|
{ name = "NODE_ENV", value = "production" },
|
|
{ name = "CORS_ORIGIN", value = var.aws_cors_origin },
|
|
{ name = "POLICY_DIR", value = var.aws_policy_dir }
|
|
],
|
|
local.aws_vault_env_plain
|
|
)
|
|
|
|
# Sensitive values fetched from Secrets Manager at task launch.
|
|
# Each entry is injected as the named environment variable.
|
|
secrets = concat(
|
|
[
|
|
{
|
|
name = "DATABASE_URL"
|
|
valueFrom = var.aws_secret_database_url_arn
|
|
},
|
|
{
|
|
name = "REDIS_URL"
|
|
valueFrom = var.aws_secret_redis_url_arn
|
|
},
|
|
{
|
|
name = "JWT_PRIVATE_KEY"
|
|
valueFrom = var.aws_secret_jwt_private_key_arn
|
|
},
|
|
{
|
|
name = "JWT_PUBLIC_KEY"
|
|
valueFrom = var.aws_secret_jwt_public_key_arn
|
|
}
|
|
],
|
|
local.aws_vault_secret_env
|
|
)
|
|
|
|
logConfiguration = {
|
|
logDriver = "awslogs"
|
|
options = {
|
|
"awslogs-group" = var.aws_log_group_name
|
|
"awslogs-region" = var.aws_region
|
|
"awslogs-stream-prefix" = "agentidp"
|
|
}
|
|
}
|
|
|
|
healthCheck = {
|
|
command = ["CMD-SHELL", "wget -qO- http://localhost:${var.app_port}/health || exit 1"]
|
|
interval = 30
|
|
timeout = 5
|
|
retries = 3
|
|
startPeriod = 60
|
|
}
|
|
|
|
readonlyRootFilesystem = false
|
|
user = "node"
|
|
}
|
|
])
|
|
|
|
tags = local.common_tags
|
|
}
|
|
|
|
# ECS Service
|
|
resource "aws_ecs_service" "app" {
|
|
count = var.provider_type == "aws" ? 1 : 0
|
|
|
|
name = "${var.project}-${var.environment}"
|
|
cluster = aws_ecs_cluster.main[0].id
|
|
task_definition = aws_ecs_task_definition.app[0].arn
|
|
desired_count = var.aws_desired_count
|
|
launch_type = "FARGATE"
|
|
|
|
# Rolling update: keep at least 100% healthy tasks during deployment
|
|
deployment_minimum_healthy_percent = 100
|
|
deployment_maximum_percent = 200
|
|
|
|
network_configuration {
|
|
subnets = var.aws_subnet_ids
|
|
security_groups = [aws_security_group.app[0].id]
|
|
assign_public_ip = false
|
|
}
|
|
|
|
load_balancer {
|
|
target_group_arn = var.aws_target_group_arn
|
|
container_name = "agentidp"
|
|
container_port = var.app_port
|
|
}
|
|
|
|
# Ignore task_definition changes driven by image tag updates — deployments
|
|
# are managed externally (CI/CD pipeline updates the image tag).
|
|
lifecycle {
|
|
ignore_changes = [task_definition, desired_count]
|
|
}
|
|
|
|
tags = local.common_tags
|
|
|
|
depends_on = [aws_ecs_cluster_capacity_providers.main]
|
|
}
|
|
|
|
################################################################################
|
|
# ── GCP PATH ──────────────────────────────────────────────────────────────────
|
|
################################################################################
|
|
|
|
# Cloud Run Service
|
|
resource "google_cloud_run_v2_service" "app" {
|
|
count = var.provider_type == "gcp" ? 1 : 0
|
|
|
|
name = "${var.project}-${var.environment}"
|
|
location = var.gcp_region
|
|
project = var.gcp_project_id
|
|
|
|
# Ingress: allow only requests from the load balancer / public internet.
|
|
# Cloud Run provides Google-managed TLS on the default *.run.app domain
|
|
# and on any custom domains mapped via Cloud Run domain mappings.
|
|
ingress = "INGRESS_TRAFFIC_ALL"
|
|
|
|
template {
|
|
service_account = var.gcp_service_account_email
|
|
|
|
scaling {
|
|
min_instance_count = var.gcp_min_instances
|
|
max_instance_count = var.gcp_max_instances
|
|
}
|
|
|
|
# VPC access — route outbound traffic through the VPC connector so the
|
|
# container can reach Cloud SQL (private IP) and Memorystore.
|
|
vpc_access {
|
|
connector = var.gcp_vpc_connector_name
|
|
egress = "PRIVATE_RANGES_ONLY"
|
|
}
|
|
|
|
containers {
|
|
image = var.app_image
|
|
|
|
ports {
|
|
container_port = var.app_port
|
|
}
|
|
|
|
resources {
|
|
limits = {
|
|
cpu = var.gcp_cpu
|
|
memory = var.gcp_memory
|
|
}
|
|
cpu_idle = false
|
|
startup_cpu_boost = true
|
|
}
|
|
|
|
# Plain environment variables
|
|
dynamic "env" {
|
|
for_each = merge(
|
|
{
|
|
PORT = tostring(var.app_port)
|
|
NODE_ENV = "production"
|
|
CORS_ORIGIN = var.gcp_cors_origin
|
|
POLICY_DIR = var.gcp_policy_dir
|
|
},
|
|
local.gcp_vault_env_plain
|
|
)
|
|
content {
|
|
name = env.key
|
|
value = env.value
|
|
}
|
|
}
|
|
|
|
# DATABASE_URL from Secret Manager
|
|
env {
|
|
name = "DATABASE_URL"
|
|
value_source {
|
|
secret_key_ref {
|
|
secret = var.gcp_secret_database_url_id
|
|
version = "latest"
|
|
}
|
|
}
|
|
}
|
|
|
|
# REDIS_URL from Secret Manager
|
|
env {
|
|
name = "REDIS_URL"
|
|
value_source {
|
|
secret_key_ref {
|
|
secret = var.gcp_secret_redis_url_id
|
|
version = "latest"
|
|
}
|
|
}
|
|
}
|
|
|
|
# JWT_PRIVATE_KEY from Secret Manager
|
|
env {
|
|
name = "JWT_PRIVATE_KEY"
|
|
value_source {
|
|
secret_key_ref {
|
|
secret = var.gcp_secret_jwt_private_key_id
|
|
version = "latest"
|
|
}
|
|
}
|
|
}
|
|
|
|
# JWT_PUBLIC_KEY from Secret Manager
|
|
env {
|
|
name = "JWT_PUBLIC_KEY"
|
|
value_source {
|
|
secret_key_ref {
|
|
secret = var.gcp_secret_jwt_public_key_id
|
|
version = "latest"
|
|
}
|
|
}
|
|
}
|
|
|
|
# VAULT_TOKEN from Secret Manager (conditional)
|
|
dynamic "env" {
|
|
for_each = var.gcp_secret_vault_token_id != "" ? [1] : []
|
|
content {
|
|
name = "VAULT_TOKEN"
|
|
value_source {
|
|
secret_key_ref {
|
|
secret = var.gcp_secret_vault_token_id
|
|
version = "latest"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
liveness_probe {
|
|
http_get {
|
|
path = "/health"
|
|
port = var.app_port
|
|
}
|
|
initial_delay_seconds = 30
|
|
period_seconds = 15
|
|
failure_threshold = 3
|
|
timeout_seconds = 5
|
|
}
|
|
|
|
startup_probe {
|
|
http_get {
|
|
path = "/health"
|
|
port = var.app_port
|
|
}
|
|
initial_delay_seconds = 10
|
|
period_seconds = 5
|
|
failure_threshold = 12
|
|
timeout_seconds = 3
|
|
}
|
|
}
|
|
}
|
|
|
|
labels = {
|
|
environment = var.environment
|
|
project = replace(var.project, "-", "_")
|
|
managed_by = "terraform"
|
|
}
|
|
}
|
|
|
|
# Allow unauthenticated (public internet) invocations of the Cloud Run service.
|
|
# Authentication for AgentIdP clients is handled by the application layer
|
|
# (JWT Bearer tokens), not by Cloud Run's built-in IAM auth.
|
|
resource "google_cloud_run_v2_service_iam_member" "public_invoker" {
|
|
count = var.provider_type == "gcp" ? 1 : 0
|
|
|
|
project = var.gcp_project_id
|
|
location = var.gcp_region
|
|
name = google_cloud_run_v2_service.app[0].name
|
|
role = "roles/run.invoker"
|
|
member = "allUsers"
|
|
}
|