feat(phase-2): workstream 8 — Multi-Region Terraform Deployment

AWS environment:
- VPC (3-AZ, public + private subnets, NAT gateways, VPC endpoints for ECR/SM/CW)
- ECS Fargate service (sentryagent/agentidp) — secrets from Secrets Manager
- RDS PostgreSQL 14 (Multi-AZ, encrypted, VPC-internal, storage autoscaling)
- ElastiCache Redis 7 (primary + replica, at-rest + in-transit encryption)
- ALB with HTTPS/443, HTTP→HTTPS redirect, ACM certificate
- Route 53 alias record

GCP environment:
- VPC + private services access + Serverless VPC connector
- Cloud Run service — secrets from Secret Manager
- Cloud SQL PostgreSQL 14 (private IP, no public endpoint)
- Cloud Memorystore Redis 7 (VPC-internal, AUTH enabled)

Shared:
- 4 reusable modules: agentidp (dual AWS/GCP), rds, redis, lb
- No hardcoded secrets; all sensitive vars marked sensitive=true
- terraform.tfvars.example for both environments
- docs/devops/deployment.md — AWS + GCP step-by-step walkthrough, rollback procedures

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
SentryAgent.ai Developer
2026-03-29 06:25:14 +00:00
parent a504964e5f
commit 6913d62648
22 changed files with 4138 additions and 8 deletions

View File

@@ -0,0 +1,176 @@
################################################################################
# Module: redis
# Main — AWS ElastiCache Redis 7
#
# - Single shard (cluster mode disabled): one primary + one replica
# - Encryption at rest and in transit (TLS)
# - AUTH token required when transit encryption is enabled
# - VPC-internal only — no public access
# - Access restricted to explicitly allowed security groups (app only)
# - Slow log + engine log delivery to CloudWatch
################################################################################
terraform {
required_version = ">= 1.6.0"
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.40.0"
}
}
}
locals {
identifier = "${var.project}-${var.environment}"
common_tags = {
environment = var.environment
project = var.project
managed_by = "terraform"
}
}
################################################################################
# CloudWatch Log Group for Redis logs
################################################################################
resource "aws_cloudwatch_log_group" "redis" {
count = var.log_delivery_enabled ? 1 : 0
name = var.log_group_name
retention_in_days = 30
tags = local.common_tags
}
################################################################################
# Security Group — only the app SGs may connect on 6379
################################################################################
resource "aws_security_group" "redis" {
name = "${local.identifier}-redis-sg"
description = "Controls inbound access to ElastiCache Redis — allow only app SG on 6379"
vpc_id = var.vpc_id
egress {
description = "All outbound"
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = merge(local.common_tags, {
Name = "${local.identifier}-redis-sg"
})
}
resource "aws_security_group_rule" "redis_ingress_from_app" {
for_each = toset(var.allowed_security_group_ids)
type = "ingress"
description = "Redis from app security group"
from_port = 6379
to_port = 6379
protocol = "tcp"
source_security_group_id = each.value
security_group_id = aws_security_group.redis.id
}
################################################################################
# ElastiCache Subnet Group
################################################################################
resource "aws_elasticache_subnet_group" "main" {
name = "${local.identifier}-redis-subnet-group"
description = "Private subnets for AgentIdP ElastiCache Redis"
subnet_ids = var.subnet_ids
tags = local.common_tags
}
################################################################################
# ElastiCache Parameter Group — Redis 7.x defaults are fine; custom group
# allows future tuning without recreating the replication group.
################################################################################
resource "aws_elasticache_parameter_group" "main" {
name = "${local.identifier}-redis7-params"
family = "redis7"
description = "AgentIdP Redis 7 parameter group"
# Disable dangerous commands that could truncate data in production
parameter {
name = "lazyfree-lazy-eviction"
value = "yes"
}
parameter {
name = "lazyfree-lazy-expire"
value = "yes"
}
tags = local.common_tags
}
################################################################################
# ElastiCache Replication Group (cluster mode disabled)
#
# cluster_mode = 0 (disabled) gives a single-shard setup:
# - 1 primary node
# - num_cache_clusters - 1 replica nodes
# This matches the application usage: token revocation (SET/GET/DEL),
# rate limiting (INCR/EXPIRE), and monthly counters (INCR) — no sharding needed.
################################################################################
resource "aws_elasticache_replication_group" "main" {
replication_group_id = local.identifier
description = "AgentIdP Redis 7 — token revocation, rate limiting, counters"
# Engine
engine = "redis"
engine_version = var.engine_version
node_type = var.node_type
parameter_group_name = aws_elasticache_parameter_group.main.name
port = 6379
# Topology — single shard, primary + replica
num_cache_clusters = var.num_cache_clusters
automatic_failover_enabled = var.automatic_failover_enabled
multi_az_enabled = var.multi_az_enabled
# Network — VPC-internal, no public endpoints
subnet_group_name = aws_elasticache_subnet_group.main.name
security_group_ids = [aws_security_group.redis.id]
# Security
at_rest_encryption_enabled = var.at_rest_encryption_enabled
transit_encryption_enabled = var.transit_encryption_enabled
auth_token = var.transit_encryption_enabled && var.auth_token != "" ? var.auth_token : null
# Maintenance and snapshots
maintenance_window = var.maintenance_window
snapshot_retention_limit = var.snapshot_retention_limit
snapshot_window = var.snapshot_window
apply_immediately = var.apply_immediately
# Log delivery to CloudWatch
dynamic "log_delivery_configuration" {
for_each = var.log_delivery_enabled ? [
{ log_type = "slow-log", log_format = "json" },
{ log_type = "engine-log", log_format = "json" }
] : []
content {
destination = var.log_delivery_enabled ? aws_cloudwatch_log_group.redis[0].name : ""
destination_type = "cloudwatch-logs"
log_format = log_delivery_configuration.value.log_format
log_type = log_delivery_configuration.value.log_type
}
}
tags = merge(local.common_tags, {
Name = local.identifier
})
}

View File

@@ -0,0 +1,34 @@
################################################################################
# Module: redis
# Outputs
################################################################################
output "primary_endpoint" {
description = "Primary endpoint hostname for write operations. Use to construct REDIS_URL."
value = aws_elasticache_replication_group.main.primary_endpoint_address
}
output "reader_endpoint" {
description = "Reader endpoint for read operations (load-balanced across replicas)."
value = aws_elasticache_replication_group.main.reader_endpoint_address
}
output "port" {
description = "Port the Redis replication group listens on (always 6379)."
value = aws_elasticache_replication_group.main.port
}
output "replication_group_id" {
description = "ID of the ElastiCache replication group."
value = aws_elasticache_replication_group.main.replication_group_id
}
output "security_group_id" {
description = "Security group ID attached to the replication group. Use to add further ingress rules."
value = aws_security_group.redis.id
}
output "redis_url" {
description = "Constructed REDIS_URL using the primary endpoint. Includes rediss:// (TLS) scheme when transit encryption is enabled."
value = var.transit_encryption_enabled ? "rediss://${aws_elasticache_replication_group.main.primary_endpoint_address}:${aws_elasticache_replication_group.main.port}" : "redis://${aws_elasticache_replication_group.main.primary_endpoint_address}:${aws_elasticache_replication_group.main.port}"
}

View File

@@ -0,0 +1,116 @@
################################################################################
# Module: redis
# Variables — AWS ElastiCache Redis 7
################################################################################
variable "environment" {
description = "Deployment environment label (e.g. production, staging)."
type = string
}
variable "project" {
description = "Project identifier used in resource names and tags."
type = string
default = "sentryagent-agentidp"
}
variable "vpc_id" {
description = "VPC ID in which to create the ElastiCache subnet group and security group."
type = string
}
variable "subnet_ids" {
description = "List of private subnet IDs for the ElastiCache subnet group. Span at least 2 AZs."
type = list(string)
}
variable "allowed_security_group_ids" {
description = "List of security group IDs (e.g. ECS app SG) permitted to connect to Redis on port 6379."
type = list(string)
default = []
}
variable "node_type" {
description = "ElastiCache node instance type."
type = string
default = "cache.t3.medium"
}
variable "engine_version" {
description = "Redis engine version. Use 7.x for Redis 7."
type = string
default = "7.1"
}
variable "num_cache_clusters" {
description = "Total number of cache clusters in the replication group (1 primary + N replicas). Minimum 2 for HA."
type = number
default = 2
}
variable "automatic_failover_enabled" {
description = "Enable automatic failover. Required when num_cache_clusters > 1."
type = bool
default = true
}
variable "multi_az_enabled" {
description = "Enable Multi-AZ for the replication group."
type = bool
default = true
}
variable "at_rest_encryption_enabled" {
description = "Encrypt data at rest."
type = bool
default = true
}
variable "transit_encryption_enabled" {
description = "Enable TLS for data in transit."
type = bool
default = true
}
variable "auth_token" {
description = "AUTH token (password) for Redis AUTH command. Required when transit_encryption_enabled = true. Minimum 16 characters."
type = string
sensitive = true
default = ""
}
variable "maintenance_window" {
description = "Preferred weekly maintenance window (ddd:hh24:mi-ddd:hh24:mi in UTC)."
type = string
default = "sun:06:00-sun:07:00"
}
variable "snapshot_retention_limit" {
description = "Number of days to retain automatic Redis snapshots. 0 disables snapshots."
type = number
default = 7
}
variable "snapshot_window" {
description = "Daily time range for automatic snapshots (hh24:mi-hh24:mi in UTC). Must not overlap maintenance_window."
type = string
default = "04:00-05:00"
}
variable "apply_immediately" {
description = "Apply changes immediately. Set to false to wait for the next maintenance window in production."
type = bool
default = false
}
variable "log_delivery_enabled" {
description = "Enable delivery of Redis slow logs and engine logs to CloudWatch."
type = bool
default = true
}
variable "log_group_name" {
description = "CloudWatch log group name for Redis logs. Created if it does not exist."
type = string
default = "/elasticache/sentryagent-agentidp/redis"
}