Decision Log of Building the Platform -1-

Hermaeus Mora ·

terraform, argocd, ansible을 사용하여 IaC(Infrastructure as Code)를 달성한다. 이는 모노 레포를 그 자체로 SSoT(Single Source of Truth)로 기능하게 하며, 데이터 중복을 최소화하고, 데이터 무결성을 유지하도록 한다.

Aws 초기 구축 시 멀티 어카운트를 구성한다. 이를 통해 보안 침해 발생 시 Blast Radius를 감소시키고, 계정 별 비용 가시성을 높일 수 있다.

또한 Aws에서 제공하는 OU(organization unit)를 사용하면 각 계정은 자신이 속한 unit의 SCP(service control policy)를 상속받을 수 있다.

위 내용을 종합하여 terraform 구조를 다음과 같이 설계한다. 부트스트랩(기반 리소스 배포), 매니지먼트(OU 배포)를 제외한 각 계정은 기본적으로 리전(IATA)-레이어 구조를 가진다.

tf
├── accounts
│   ├── _bootstrap
│   ├── _management
│   ├── infrastructure
│   │   ├── network
│   │   ├── platform
│   │   │   ├── _global
│   │   │   │   └── route53
│   │   │   └── icn
│   │   │       ├── argocd
│   │   │       ├── eks
│   │   │       ├── obs
│   │   │       └── network
│   │   └── shared_services
│   │       ├── icn
│   │       │   └── ecr
│   │       └── nrt
│   │           └── ecr
│   ├── security
│   │   └── log_archive
│   └── workloads
│       ├── workloads_nonprod
│       │   ├── app_nonprod
│       │   │   ├── _global
│       │   │   │   └── frontend
│       │   │   └── icn
│       │   │       ├── argocd
│       │   │       ├── nks
│       │   │       ├── network
│       │   │       ├── keycloak
│       │   │       └── vault
│       │   └── ml_nonprod
│       │       └── nrt
│       │           ├── eks
│       │           ├── hyperpod
│       │           ├── inference
│       │           └── network
│       └── workloads_prod
│           ├── app_prod
│           │   ├── _global
│           │   │   └── frontend
│           │   └── icn
│           │       ├── argocd
│           │       ├── nks
│           │       ├── network
│           │       ├── keycloak
│           │       └── vault
│           └── ml_prod
│               └── nrt
│                   ├── eks
│                   ├── hyperpod
│                   ├── inference
│                   └── network
└── modules
    ├── account_bootstrap
    ├── argocd_remote_cluster
    ├── ecr
    ├── frontend
    ├── gitlab_oidc_provider
    ├── image_cdn
    └── inference_env

Aws 자격 증명을 위해 Idc(Identity center)를 활성화한다. 이를 통해 멀티 어카운트 접근을 단순화하고 정적 시크릿 키 대신 STS(Security token service)를 통해 임시 자격 증명을 부여한다.

매니지먼트 계정으로 Idc를 활성화한 후 initial admin을 수동으로 생성, 패스워드 및 MFA를 설정한다. 다음과 같이 $HOME/.aws/config 작성 후 aws sso login --profile test-mgmttest 세션의 모든 프로파일을 활성화할 수 있다.

[sso-session test]
sso_start_url = https://ssoins-xxxxxxxxxxxxxxxx.portal.ap-northeast-2.app.aws
sso_region = ap-northeast-2
sso_registration_scopes = sso:account:access
 
[profile test-mgmt]
sso_session = test
sso_account_id = xxxxxxxxxxxx
sso_role_name = AdministratorAccess
output = json
...

매니지먼트 계정에 수동으로 test-mgmt-tf 버킷 생성 후 테라폼으로 OU를 배포한다.

terraform {
  backend "s3" {
    bucket       = "test-mgmt-tf"
    key          = "terraform.tfstate"
    profile      = "test-mgmt"
    region       = "ap-northeast-2"
    encrypt      = true
    use_lockfile = true
  }
  
  required_providers {
    aws        = { source = "hashicorp/aws" }
  }
}
 
provider "aws"   { 
  profile = "test-mgmt"
  region = var.aws_region
}
 
data "aws_organizations_organization" "current" {}
 
resource "aws_organizations_organizational_unit" "dep_1" {
  for_each = { for ou in var.ous_dep_1 : ou.name => ou }
 
  name      = each.value.name
  parent_id = data.aws_organizations_organization.current.roots[0].id
}
 
resource "aws_organizations_organizational_unit" "dep_2" {
  for_each = { for ou in var.ous_dep_2 : ou.name => ou }
 
  name      = each.value.name
  parent_id = aws_organizations_organizational_unit.dep_1[each.value.parent].id
}
 
locals {
  all_ous = merge(
    aws_organizations_organizational_unit.dep_1,
    aws_organizations_organizational_unit.dep_2,
  )
}
 
resource "aws_organizations_account" "this" {
  for_each = { for account in var.accounts : account.name => account }
  name      = each.value.name
  email     = each.value.email
  parent_id = local.all_ous[each.value.ou].id
  role_name = "OrganizationAccountAccessRole"
 
  close_on_deletion = false
  
  lifecycle {
    prevent_destroy = true
    ignore_changes  = [email]
  }
}

조직 루트에 SCP를 적용한다. 해당 SCP는 Cloudtrail 변조, 조직 탈퇴, 루트 계정 접근 및 허가되지 않은 리전 접근을 차단한다.

resource "aws_organizations_policy" "scp_baseline" {
  name        = "ScpBaseline"
  type        = "SERVICE_CONTROL_POLICY"
 
  content = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Sid    = "DenyDeletingCloudTrail"
        Effect = "Deny"
        Action = [
          "cloudtrail:DeleteTrail",
          "cloudtrail:StopLogging",
          "cloudtrail:UpdateTrail",
          "cloudtrail:PutEventSelectors",
          "cloudtrail:PutInsightSelectors",
        ]
        Resource = "*"
      },
      {
        Sid    = "DenyLeavingOrganization"
        Effect = "Deny"
        Action = [
          "organizations:LeaveOrganization",
        ]
        Resource = "*"
      },
      {
        Sid      = "DenyRootUser"
        Effect   = "Deny"
        Action   = "*"
        Resource = "*"
        Condition = { StringLike = { "aws:PrincipalArn" = "arn:aws:iam::*:root" } }
      }
    ]
  })
}
 
resource "aws_organizations_policy_attachment" "scp_baseline" {
  # MSP 계정은 루트에 직접 설정 불가
  for_each = aws_organizations_organizational_unit.dep_1
  policy_id = aws_organizations_policy.scp_baseline.id
  target_id = each.value.id
}
 
resource "aws_organizations_policy" "scp_region_deny" {
  name = "ScpRegionDeny"
  type = "SERVICE_CONTROL_POLICY"
 
  content = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Sid    = "RegionDeny"
        Effect = "Deny"
        NotAction = [
          "a4b:*",
          "access-analyzer:*",
          "account:*",
          "acm:*",
          "activate:*",
          "artifact:*",
          "aws-marketplace-management:*",
          "aws-marketplace:*",
          "aws-portal:*",
          "billing:*",
          "billingconductor:*",
          "budgets:*",
          "ce:*",
          "chatbot:*",
          "chime:*",
          "cloudfront:*",
          "cloudtrail:LookupEvents",
          "compute-optimizer:*",
          "config:*",
          "consoleapp:*",
          "consolidatedbilling:*",
          "cur:*",
          "datapipeline:GetAccountLimits",
          "devicefarm:*",
          "directconnect:*",
          "ec2:DescribeRegions",
          "ec2:DescribeTransitGateways",
          "ec2:DescribeVpnGateways",
          "ecr-public:*",
          "fms:*",
          "freetier:*",
          "globalaccelerator:*",
          "health:*",
          "iam:*",
          "importexport:*",
          "invoicing:*",
          "iq:*",
          "kms:*",
          "license-manager:ListReceivedLicenses",
          "lightsail:Get*",
          "mobileanalytics:*",
          "networkmanager:*",
          "notifications-contacts:*",
          "notifications:*",
          "organizations:*",
          "payments:*",
          "pricing:*",
          "quicksight:DescribeAccountSubscription",
          "resource-explorer-2:*",
          "route53-recovery-cluster:*",
          "route53-recovery-control-config:*",
          "route53-recovery-readiness:*",
          "route53:*",
          "route53domains:*",
          "s3:CreateMultiRegionAccessPoint",
          "s3:DeleteMultiRegionAccessPoint",
          "s3:DescribeMultiRegionAccessPointOperation",
          "s3:GetAccountPublicAccessBlock",
          "s3:GetBucketLocation",
          "s3:GetBucketPolicyStatus",
          "s3:GetBucketPublicAccessBlock",
          "s3:GetMultiRegionAccessPoint",
          "s3:GetMultiRegionAccessPointPolicy",
          "s3:GetMultiRegionAccessPointPolicyStatus",
          "s3:GetStorageLensConfiguration",
          "s3:GetStorageLensDashboard",
          "s3:ListAllMyBuckets",
          "s3:ListMultiRegionAccessPoints",
          "s3:ListStorageLensConfigurations",
          "s3:PutAccountPublicAccessBlock",
          "s3:PutMultiRegionAccessPointPolicy",
          "savingsplans:*",
          "shield:*",
          "sso:*",
          "sts:*",
          "support:*",
          "supportapp:*",
          "supportplans:*",
          "sustainability:*",
          "tag:GetResources",
          "tax:*",
          "trustedadvisor:*",
          "vendor-insights:ListEntitledSecurityProfiles",
          "waf-regional:*",
          "waf:*",
          "wafv2:*",
        ]
        Resource = "*"
        Condition = {
          StringNotEquals = {
            "aws:RequestedRegion" = var.governed_regions
          }
          ArnNotLike = {
            "aws:PrincipalArn" = ["arn:aws:iam::*:role/aws-service-role/*"]
          }
        }
      }
    ]
  })
}
 
resource "aws_organizations_policy_attachment" "scp_region_deny" {
  for_each = aws_organizations_organizational_unit.dep_1
  policy_id = aws_organizations_policy.scp_region_deny.id
  target_id = each.value.id
}

또한 아래와 같이 Permission Set을 생성한다.

data "aws_ssoadmin_instances" "this" {}
 
locals {
  sso_instance_arn  = tolist(data.aws_ssoadmin_instances.this.arns)[0]
  identity_store_id = tolist(data.aws_ssoadmin_instances.this.identity_store_ids)[0]
 
  group_to_ps = { for group in var.groups : group.id => group.permission_set }
 
  permission_sets = {
    admin = {
      name                = "AdministratorAccess"
      session_duration    = "PT12H"
      managed_policy_arns = ["arn:aws:iam::aws:policy/AdministratorAccess"]
      inline_policy       = null
    }
    dev = {
      name                = "DevAccess"
      session_duration    = "PT12H"
      managed_policy_arns = []
      inline_policy = jsonencode({
        Version = "2012-10-17"
        Statement = [
          ...
        ]
      })
    }
  }
 
  account_group_assignments = merge([
    for account in var.accounts : {
      for group in account.groups :
      "${account.name}-${group}" => {
        account_id = aws_organizations_account.this[account.name].id
        group      = group
      }
    }
  ]...)
 
  managed_policy_attachments = merge([
    for ps_key, ps in local.permission_sets : {
      for arn in ps.managed_policy_arns :
      "${ps_key}-${arn}" => {
        permission_set = ps_key
        policy_arn     = arn
      }
    }
  ]...)
  
  user_group_memberships = merge([
    for user in var.users : {
      for group in user.groups : "${user.id}-${group}" => {
        user_id = user.id
        group    = group
      }
    }
  ]...)
}
 
resource "aws_identitystore_group" "this" {
  for_each = { for group in var.groups : group.id => group }
  identity_store_id = local.identity_store_id
  display_name      = each.value.id
}
 
# 수동으로 생성한 최초 관리자를 가져온다
data "aws_identitystore_user" "initial_admin" {
  identity_store_id = local.identity_store_id
 
  alternate_identifier {
    unique_attribute {
      attribute_path  = "UserName"
      attribute_value = "root"
    }
  }
}
 
resource "aws_identitystore_user" "this" {
  for_each = { for user in var.users : user.id => user }
  identity_store_id = local.identity_store_id
  display_name = each.value.id
  user_name    = each.value.id
  name {
    given_name  = each.value.given_name
    family_name = each.value.family_name
  }
  emails {
    value   = each.value.email
    primary = true
  }
}
 
resource "aws_identitystore_group_membership" "initial_admin" {
  identity_store_id = local.identity_store_id
  group_id          = aws_identitystore_group.this["admin"].group_id
  member_id         = data.aws_identitystore_user.initial_admin.user_id
}
 
resource "aws_identitystore_group_membership" "this" {
  for_each = local.user_group_memberships
  identity_store_id = local.identity_store_id
  group_id          = aws_identitystore_group.this[each.value.group].group_id
  member_id         = aws_identitystore_user.this[each.value.user_id].user_id
}
 
resource "aws_ssoadmin_permission_set" "this" {
  for_each = local.permission_sets
 
  name             = each.value.name
  instance_arn     = local.sso_instance_arn
  session_duration = each.value.session_duration
}
 
resource "aws_ssoadmin_managed_policy_attachment" "this" {
  for_each = local.managed_policy_attachments
 
  instance_arn       = local.sso_instance_arn
  permission_set_arn = aws_ssoadmin_permission_set.this[each.value.permission_set].arn
  managed_policy_arn = each.value.policy_arn
}
 
resource "aws_ssoadmin_permission_set_inline_policy" "this" {
  for_each = { for ps_key, ps in local.permission_sets : ps_key => ps if ps.inline_policy != null }
 
  instance_arn       = local.sso_instance_arn
  permission_set_arn = aws_ssoadmin_permission_set.this[each.key].arn
  inline_policy      = each.value.inline_policy
}
 
resource "aws_ssoadmin_account_assignment" "this" {
  for_each = local.account_group_assignments
 
  instance_arn       = local.sso_instance_arn
  permission_set_arn = aws_ssoadmin_permission_set.this[local.group_to_ps[each.value.group]].arn
 
  principal_id   = aws_identitystore_group.this[each.value.group].group_id
  principal_type = "GROUP"
 
  target_id   = each.value.account_id
  target_type = "AWS_ACCOUNT"
}

계정을 생성한 뒤 해당 계정에 테라폼을 배포하기 위해 부트스트랩을 진행한다. 우선 tfstate를 저장할 버킷을 배포하기 위한 account_bootstrap 모듈을 작성한다.

terraform {
  required_providers {
    aws = {
      source                = "hashicorp/aws"
      configuration_aliases = [aws]
    }
  }
}
 
resource "aws_kms_key" "tfstate" {
  deletion_window_in_days = 30
  enable_key_rotation     = true
}
 
resource "aws_kms_alias" "tfstate" {
  name          = "alias/tfstate"
  target_key_id = aws_kms_key.tfstate.id
}
 
resource "aws_s3_bucket" "tfstate" {
  bucket = "${var.bucket_prefix}-tf"
 
  lifecycle {
    prevent_destroy = true
  }
}
 
resource "aws_s3_bucket_versioning" "tfstate" {
  bucket = aws_s3_bucket.tfstate.id
  versioning_configuration {
    status = "Enabled"
  }
}
 
resource "aws_s3_bucket_server_side_encryption_configuration" "tfstate" {
  bucket = aws_s3_bucket.tfstate.id
 
  rule {
    apply_server_side_encryption_by_default {
      sse_algorithm     = "aws:kms"
      kms_master_key_id = aws_kms_key.tfstate.arn
    }
    bucket_key_enabled = true
  }
}
 
resource "aws_s3_bucket_public_access_block" "tfstate" {
  bucket                  = aws_s3_bucket.tfstate.id
  block_public_acls       = true
  block_public_policy     = true
  ignore_public_acls      = true
  restrict_public_buckets = true
}
 
resource "aws_s3_bucket_lifecycle_configuration" "tfstate" {
  bucket = aws_s3_bucket.tfstate.id
 
  rule {
    id     = "expire-old-versions"
    status = "Enabled"
 
    filter {}
 
    noncurrent_version_expiration {
      noncurrent_days = 90
    }
 
    abort_incomplete_multipart_upload {
      days_after_initiation = 7
    }
  }
}
 
variable "bucket_prefix" {
  type        = string
}

이후 해당 모듈로 각 계정에 부트스트랩을 진행한다.

terraform {
  backend "s3" {
    bucket       = "test-mgmt-tf"
    key          = "bootstrap/terraform.tfstate"
    profile      = "test-mgmt"
    region       = "ap-northeast-2"
    encrypt      = true
    use_lockfile = true
  }
 
  required_providers {
    aws = { source = "hashicorp/aws" }
  }
}
 
provider "aws" {
  profile = "test-mgmt"
  region  = var.aws_region
}
 
provider "aws" {
  alias   = "xxxx"
  profile = "test-mgmt"
  region  = var.aws_region
  assume_role { role_arn = "arn:aws:iam::${var.account_ids["xxxx"]}:role/OrganizationAccountAccessRole" }
}
 
module "bootstrap_xxxx" {
  source       = "../../modules/account_bootstrap"
  providers    = { aws = aws.xxxx }
  bucket_prefix = "xxxx"
}
...