Infrastructure as Code (IaC)

IaC là gì?

Infrastructure as Code (IaC) là practice quản lý và provision computing infrastructure thông qua machine-readable definition files, thay vì physical hardware configuration.

Lợi ích của IaC

  1. Consistency: Đảm bảo environments giống nhau
  2. Version Control: Track changes trong infrastructure
  3. Automation: Tự động hóa deployment và scaling
  4. Cost Optimization: Optimize resource usage

Terraform Example

# main.tf
provider "aws" {
  region = "us-west-2"
}

# VPC
resource "aws_vpc" "main" {
  cidr_block = "10.0.0.0/16"

  tags = {
    Name = "main-vpc"
  }
}

# ECS Cluster
resource "aws_ecs_cluster" "app" {
  name = "app-cluster"
}

# ECS Task Definition
resource "aws_ecs_task_definition" "app" {
  family                   = "app"
  requires_compatibilities = ["FARGATE"]
  cpu                      = 512
  memory                   = 1024

  container_definitions = jsonencode([
    {
      name  = "app"
      image = "myapp:latest"
      portMappings = [
        {
          containerPort = 8080
        }
      ]
    }
  ])
}

Ansible Example

# playbook.yml
- name: Deploy Java Application
  hosts: servers
  tasks:
    - name: Install Java
      apt:
        name: openjdk-17-jdk
        state: present

    - name: Copy application
      copy:
        src: app.jar
        dest: /opt/app.jar

    - name: Start application
      systemd:
        name: app
        state: started

Best Practices

  1. Version Control: Store IaC code in git
  2. Environment Separation: Different configs cho dev/prod
  3. State Management: Use remote state storage
  4. Security: Encrypt sensitive data

Terraform cho Java Applications

Basic Terraform Configuration

# main.tf
terraform {
  required_version = ">= 1.0"
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
  }
}

provider "aws" {
  region = var.aws_region
}

# Variables
variable "aws_region" {
  description = "AWS region"
  type        = string
  default     = "us-west-2"
}

variable "environment" {
  description = "Environment name"
  type        = string
  default     = "production"
}

variable "app_name" {
  description = "Application name"
  type        = string
  default     = "user-service"
}

# VPC Configuration
resource "aws_vpc" "main" {
  cidr_block           = "10.0.0.0/16"
  enable_dns_hostnames = true
  enable_dns_support   = true

  tags = {
    Name        = "${var.app_name}-vpc"
    Environment = var.environment
  }
}

# Internet Gateway
resource "aws_internet_gateway" "main" {
  vpc_id = aws_vpc.main.id

  tags = {
    Name        = "${var.app_name}-igw"
    Environment = var.environment
  }
}

# Public Subnets
resource "aws_subnet" "public" {
  count = 2

  vpc_id                  = aws_vpc.main.id
  cidr_block              = "10.0.${count.index + 1}.0/24"
  availability_zone       = data.aws_availability_zones.available.names[count.index]
  map_public_ip_on_launch = true

  tags = {
    Name        = "${var.app_name}-public-subnet-${count.index + 1}"
    Environment = var.environment
    Type        = "Public"
  }
}

# Private Subnets
resource "aws_subnet" "private" {
  count = 2

  vpc_id            = aws_vpc.main.id
  cidr_block        = "10.0.${count.index + 10}.0/24"
  availability_zone = data.aws_availability_zones.available.names[count.index]

  tags = {
    Name        = "${var.app_name}-private-subnet-${count.index + 1}"
    Environment = var.environment
    Type        = "Private"
  }
}

# Route Tables
resource "aws_route_table" "public" {
  vpc_id = aws_vpc.main.id

  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = aws_internet_gateway.main.id
  }

  tags = {
    Name        = "${var.app_name}-public-rt"
    Environment = var.environment
  }
}

resource "aws_route_table_association" "public" {
  count = length(aws_subnet.public)

  subnet_id      = aws_subnet.public[count.index].id
  route_table_id = aws_route_table.public.id
}

# Data sources
data "aws_availability_zones" "available" {
  state = "available"
}

ECS Cluster cho Java Application

# ecs.tf
# ECS Cluster
resource "aws_ecs_cluster" "main" {
  name = "${var.app_name}-cluster"

  setting {
    name  = "containerInsights"
    value = "enabled"
  }

  tags = {
    Name        = "${var.app_name}-cluster"
    Environment = var.environment
  }
}

# ECS Task Definition
resource "aws_ecs_task_definition" "app" {
  family                   = var.app_name
  network_mode             = "awsvpc"
  requires_compatibilities = ["FARGATE"]
  cpu                      = 512
  memory                   = 1024
  execution_role_arn       = aws_iam_role.ecs_execution_role.arn
  task_role_arn            = aws_iam_role.ecs_task_role.arn

  container_definitions = jsonencode([
    {
      name  = var.app_name
      image = "${aws_ecr_repository.app.repository_url}:latest"

      portMappings = [
        {
          containerPort = 8080
          protocol      = "tcp"
        }
      ]

      environment = [
        {
          name  = "SPRING_PROFILES_ACTIVE"
          value = var.environment
        },
        {
          name  = "DB_HOST"
          value = aws_rds_cluster.database.endpoint
        }
      ]

      secrets = [
        {
          name      = "DB_PASSWORD"
          valueFrom = aws_secretsmanager_secret.db_password.arn
        }
      ]

      logConfiguration = {
        logDriver = "awslogs"
        options = {
          awslogs-group         = aws_cloudwatch_log_group.app.name
          awslogs-region        = var.aws_region
          awslogs-stream-prefix = "ecs"
        }
      }

      healthCheck = {
        command = [
          "CMD-SHELL",
          "curl -f http://localhost:8080/actuator/health || exit 1"
        ]
        interval    = 30
        timeout     = 5
        retries     = 3
        startPeriod = 60
      }
    }
  ])

  tags = {
    Name        = "${var.app_name}-task"
    Environment = var.environment
  }
}

# ECS Service
resource "aws_ecs_service" "app" {
  name            = var.app_name
  cluster         = aws_ecs_cluster.main.id
  task_definition = aws_ecs_task_definition.app.arn
  desired_count   = var.environment == "production" ? 3 : 1
  launch_type     = "FARGATE"

  network_configuration {
    subnets          = aws_subnet.private[*].id
    security_groups  = [aws_security_group.ecs_tasks.id]
    assign_public_ip = false
  }

  load_balancer {
    target_group_arn = aws_lb_target_group.app.arn
    container_name   = var.app_name
    container_port   = 8080
  }

  depends_on = [aws_lb_listener.app]

  tags = {
    Name        = "${var.app_name}-service"
    Environment = var.environment
  }
}

# ECR Repository
resource "aws_ecr_repository" "app" {
  name                 = var.app_name
  image_tag_mutability = "MUTABLE"

  image_scanning_configuration {
    scan_on_push = true
  }

  tags = {
    Name        = "${var.app_name}-ecr"
    Environment = var.environment
  }
}

RDS Database Configuration

# database.tf
# RDS Subnet Group
resource "aws_db_subnet_group" "database" {
  name       = "${var.app_name}-db-subnet-group"
  subnet_ids = aws_subnet.private[*].id

  tags = {
    Name        = "${var.app_name}-db-subnet-group"
    Environment = var.environment
  }
}

# RDS Aurora Cluster
resource "aws_rds_cluster" "database" {
  cluster_identifier     = "${var.app_name}-db-cluster"
  engine                 = "aurora-mysql"
  engine_version         = "8.0.mysql_aurora.3.02.0"
  database_name          = var.app_name
  master_username        = "admin"
  manage_master_user_password = true

  db_subnet_group_name   = aws_db_subnet_group.database.name
  vpc_security_group_ids = [aws_security_group.database.id]

  backup_retention_period = var.environment == "production" ? 7 : 1
  preferred_backup_window = "03:00-04:00"

  skip_final_snapshot = var.environment != "production"
  deletion_protection = var.environment == "production"

  tags = {
    Name        = "${var.app_name}-db-cluster"
    Environment = var.environment
  }
}

# RDS Aurora Instances
resource "aws_rds_cluster_instance" "database" {
  count = var.environment == "production" ? 2 : 1

  identifier          = "${var.app_name}-db-instance-${count.index}"
  cluster_identifier  = aws_rds_cluster.database.id
  instance_class      = var.environment == "production" ? "db.r6g.large" : "db.t3.medium"
  engine              = aws_rds_cluster.database.engine
  engine_version      = aws_rds_cluster.database.engine_version

  performance_insights_enabled = var.environment == "production"
  monitoring_interval          = var.environment == "production" ? 60 : 0

  tags = {
    Name        = "${var.app_name}-db-instance-${count.index}"
    Environment = var.environment
  }
}

# Secrets Manager for DB Password
resource "aws_secretsmanager_secret" "db_password" {
  name                    = "${var.app_name}-db-password"
  description             = "Database password for ${var.app_name}"
  recovery_window_in_days = 7

  tags = {
    Name        = "${var.app_name}-db-password"
    Environment = var.environment
  }
}

Load Balancer Configuration

# load_balancer.tf
# Application Load Balancer
resource "aws_lb" "app" {
  name               = "${var.app_name}-alb"
  internal           = false
  load_balancer_type = "application"
  security_groups    = [aws_security_group.alb.id]
  subnets            = aws_subnet.public[*].id

  enable_deletion_protection = var.environment == "production"

  tags = {
    Name        = "${var.app_name}-alb"
    Environment = var.environment
  }
}

# Target Group
resource "aws_lb_target_group" "app" {
  name        = "${var.app_name}-tg"
  port        = 8080
  protocol    = "HTTP"
  vpc_id      = aws_vpc.main.id
  target_type = "ip"

  health_check {
    enabled             = true
    healthy_threshold   = 2
    unhealthy_threshold = 2
    timeout             = 5
    interval            = 30
    path                = "/actuator/health"
    matcher             = "200"
    port                = "traffic-port"
    protocol            = "HTTP"
  }

  tags = {
    Name        = "${var.app_name}-tg"
    Environment = var.environment
  }
}

# ALB Listener
resource "aws_lb_listener" "app" {
  load_balancer_arn = aws_lb.app.arn
  port              = "80"
  protocol          = "HTTP"

  default_action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.app.arn
  }

  tags = {
    Name        = "${var.app_name}-listener"
    Environment = var.environment
  }
}

# HTTPS Listener (if SSL certificate available)
resource "aws_lb_listener" "app_https" {
  count = var.ssl_certificate_arn != "" ? 1 : 0

  load_balancer_arn = aws_lb.app.arn
  port              = "443"
  protocol          = "HTTPS"
  ssl_policy        = "ELBSecurityPolicy-TLS-1-2-2017-01"
  certificate_arn   = var.ssl_certificate_arn

  default_action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.app.arn
  }

  tags = {
    Name        = "${var.app_name}-https-listener"
    Environment = var.environment
  }
}

variable "ssl_certificate_arn" {
  description = "SSL certificate ARN for HTTPS"
  type        = string
  default     = ""
}

Security Groups

# security_groups.tf
# ALB Security Group
resource "aws_security_group" "alb" {
  name        = "${var.app_name}-alb-sg"
  description = "Security group for ALB"
  vpc_id      = aws_vpc.main.id

  ingress {
    from_port   = 80
    to_port     = 80
    protocol    = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
  }

  ingress {
    from_port   = 443
    to_port     = 443
    protocol    = "tcp"
    cidr_blocks = ["0.0.0.0/0"]
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = {
    Name        = "${var.app_name}-alb-sg"
    Environment = var.environment
  }
}

# ECS Tasks Security Group
resource "aws_security_group" "ecs_tasks" {
  name        = "${var.app_name}-ecs-tasks-sg"
  description = "Security group for ECS tasks"
  vpc_id      = aws_vpc.main.id

  ingress {
    from_port       = 8080
    to_port         = 8080
    protocol        = "tcp"
    security_groups = [aws_security_group.alb.id]
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = {
    Name        = "${var.app_name}-ecs-tasks-sg"
    Environment = var.environment
  }
}

# Database Security Group
resource "aws_security_group" "database" {
  name        = "${var.app_name}-db-sg"
  description = "Security group for RDS database"
  vpc_id      = aws_vpc.main.id

  ingress {
    from_port       = 3306
    to_port         = 3306
    protocol        = "tcp"
    security_groups = [aws_security_group.ecs_tasks.id]
  }

  egress {
    from_port   = 0
    to_port     = 0
    protocol    = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = {
    Name        = "${var.app_name}-db-sg"
    Environment = var.environment
  }
}

IAM Roles

# iam.tf
# ECS Execution Role
resource "aws_iam_role" "ecs_execution_role" {
  name = "${var.app_name}-ecs-execution-role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "ecs-tasks.amazonaws.com"
        }
      }
    ]
  })

  tags = {
    Name        = "${var.app_name}-ecs-execution-role"
    Environment = var.environment
  }
}

resource "aws_iam_role_policy_attachment" "ecs_execution_role_policy" {
  role       = aws_iam_role.ecs_execution_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}

# Additional policy for Secrets Manager access
resource "aws_iam_role_policy" "ecs_execution_secrets" {
  name = "${var.app_name}-ecs-execution-secrets"
  role = aws_iam_role.ecs_execution_role.id

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"
        Action = [
          "secretsmanager:GetSecretValue"
        ]
        Resource = [
          aws_secretsmanager_secret.db_password.arn
        ]
      }
    ]
  })
}

# ECS Task Role
resource "aws_iam_role" "ecs_task_role" {
  name = "${var.app_name}-ecs-task-role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = "sts:AssumeRole"
        Effect = "Allow"
        Principal = {
          Service = "ecs-tasks.amazonaws.com"
        }
      }
    ]
  })

  tags = {
    Name        = "${var.app_name}-ecs-task-role"
    Environment = var.environment
  }
}

# Task role policy for application-specific permissions
resource "aws_iam_role_policy" "ecs_task_policy" {
  name = "${var.app_name}-ecs-task-policy"
  role = aws_iam_role.ecs_task_role.id

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Effect = "Allow"
        Action = [
          "s3:GetObject",
          "s3:PutObject"
        ]
        Resource = [
          "${aws_s3_bucket.app_storage.arn}/*"
        ]
      }
    ]
  })
}

Outputs

# outputs.tf
output "load_balancer_dns" {
  description = "DNS name of the load balancer"
  value       = aws_lb.app.dns_name
}

output "database_endpoint" {
  description = "RDS cluster endpoint"
  value       = aws_rds_cluster.database.endpoint
  sensitive   = true
}

output "ecr_repository_url" {
  description = "ECR repository URL"
  value       = aws_ecr_repository.app.repository_url
}

output "ecs_cluster_name" {
  description = "ECS cluster name"
  value       = aws_ecs_cluster.main.name
}

output "vpc_id" {
  description = "VPC ID"
  value       = aws_vpc.main.id
}

Terraform Commands

# Initialize Terraform
terraform init

# Plan the deployment
terraform plan -var="environment=production"

# Apply the configuration
terraform apply -var="environment=production"

# Show current state
terraform show

# List resources
terraform state list

# Destroy infrastructure
terraform destroy -var="environment=production"

Ansible Configuration Management

Ansible Playbook cho Java Application

# playbook.yml
---
- name: Deploy Java Application
  hosts: web_servers
  become: yes
  vars:
    app_name: user-service
    app_version: "{{ version | default('latest') }}"
    java_home: /usr/lib/jvm/java-17-openjdk
    app_user: appuser
    app_port: 8080

  tasks:
    - name: Update system packages
      apt:
        update_cache: yes
        upgrade: dist

    - name: Install Java 17
      apt:
        name: openjdk-17-jdk
        state: present

    - name: Create application user
      user:
        name: "{{ app_user }}"
        system: yes
        shell: /bin/false
        home: /opt/{{ app_name }}
        create_home: yes

    - name: Create application directories
      file:
        path: "{{ item }}"
        state: directory
        owner: "{{ app_user }}"
        group: "{{ app_user }}"
        mode: '0755'
      loop:
        - /opt/{{ app_name }}
        - /opt/{{ app_name }}/bin
        - /opt/{{ app_name }}/config
        - /opt/{{ app_name }}/logs

    - name: Download application JAR
      get_url:
        url: "https://artifacts.company.com/{{ app_name }}/{{ app_version }}/{{ app_name }}.jar"
        dest: /opt/{{ app_name }}/bin/{{ app_name }}.jar
        owner: "{{ app_user }}"
        group: "{{ app_user }}"
        mode: '0644'
      notify: restart application

    - name: Copy application configuration
      template:
        src: application.yml.j2
        dest: /opt/{{ app_name }}/config/application.yml
        owner: "{{ app_user }}"
        group: "{{ app_user }}"
        mode: '0640'
      notify: restart application

    - name: Copy systemd service file
      template:
        src: user-service.service.j2
        dest: /etc/systemd/system/{{ app_name }}.service
        mode: '0644'
      notify:
        - reload systemd
        - restart application

    - name: Start and enable application service
      systemd:
        name: "{{ app_name }}"
        state: started
        enabled: yes
        daemon_reload: yes

  handlers:
    - name: reload systemd
      systemd:
        daemon_reload: yes

    - name: restart application
      systemd:
        name: "{{ app_name }}"
        state: restarted

Application Configuration Template

# templates/application.yml.j2
server:
  port: {{ app_port }}

spring:
  profiles:
    active: {{ spring_profile | default('production') }}

  datasource:
    url: jdbc:mysql://{{ database_host }}:3306/{{ database_name }}
    username: {{ database_username }}
    password: {{ database_password }}
    driver-class-name: com.mysql.cj.jdbc.Driver

  jpa:
    hibernate:
      ddl-auto: validate
    show-sql: false
    properties:
      hibernate:
        dialect: org.hibernate.dialect.MySQL8Dialect

  redis:
    host: {{ redis_host | default('localhost') }}
    port: {{ redis_port | default(6379) }}

logging:
  level:
    com.company: INFO
    org.springframework: WARN
  file:
    name: /opt/{{ app_name }}/logs/application.log

management:
  endpoints:
    web:
      exposure:
        include: health,metrics,prometheus
  endpoint:
    health:
      show-details: always

Systemd Service Template

# templates/user-service.service.j2
[Unit]
Description={{ app_name | title }} Java Application
After=network.target

[Service]
Type=simple
User={{ app_user }}
Group={{ app_user }}
WorkingDirectory=/opt/{{ app_name }}

Environment=JAVA_HOME={{ java_home }}
Environment=SPRING_PROFILES_ACTIVE={{ spring_profile | default('production') }}

ExecStart={{ java_home }}/bin/java \
  -Xms512m \
  -Xmx1024m \
  -XX:+UseG1GC \
  -XX:+UseContainerSupport \
  -Dspring.config.location=classpath:/application.yml,/opt/{{ app_name }}/config/application.yml \
  -jar /opt/{{ app_name }}/bin/{{ app_name }}.jar

ExecStop=/bin/kill -TERM $MAINPID
Restart=always
RestartSec=10

StandardOutput=journal
StandardError=journal
SyslogIdentifier={{ app_name }}

[Install]
WantedBy=multi-user.target

Best Practices cho IaC

1. Version Control

  • Store all IaC code in version control
  • Use branching strategies for different environments
  • Tag releases for tracking

2. Environment Separation

  • Separate configurations cho dev, staging, production
  • Use variables và parameters
  • Environment-specific tfvars files

3. State Management

  • Use remote state storage (S3, Azure Storage)
  • Enable state locking
  • Regular state backups

4. Security

  • Use IAM roles thay vì access keys
  • Encrypt sensitive data
  • Regular security audits

5. Documentation

  • Comment complex configurations
  • Maintain README files
  • Document architecture decisions

6. Testing

  • Validate syntax before applying
  • Test in lower environments first
  • Use policy as code tools

Infrastructure as Code enables consistent, repeatable, và scalable infrastructure management cho Java applications trong cloud environments.