# Terraform configuration to create an Amazon EMR cluster.

provider "aws" {
  region = "us-east-1"
}

# --- 1. IAM Roles ---
# EMR Service Role
resource "aws_iam_role" "emr_service_role" {
  name = "MyTerraformEMRServiceRole"

  assume_role_policy = jsonencode({
    Version = "2012-10-17",
    Statement = [
      {
        Effect = "Allow",
        Principal = {
          Service = "elasticmapreduce.amazonaws.com"
        },
        Action = "sts:AssumeRole"
      }
    ]
  })
}

resource "aws_iam_role_policy_attachment" "emr_service_role_policy" {
  role       = aws_iam_role.emr_service_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEMRServicePolicy_v2"
}

# EMR EC2 Instance Profile Role
resource "aws_iam_role" "emr_ec2_role" {
  name = "MyTerraformEMREC2Role"

  assume_role_policy = jsonencode({
    Version = "2012-10-17",
    Statement = [
      {
        Effect = "Allow",
        Principal = {
          Service = "ec2.amazonaws.com"
        },
        Action = "sts:AssumeRole"
      }
    ]
  })
}

resource "aws_iam_role_policy_attachment" "emr_ec2_role_policy" {
  role       = aws_iam_role.emr_ec2_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEC2forEMRRole"
}

resource "aws_iam_role_policy_attachment" "emr_s3_access_policy" {
  role       = aws_iam_role.emr_ec2_role.name
  policy_arn = "arn:aws:iam::aws:policy/AmazonS3FullAccess" # For simplicity, grant S3 access for logs and data
}

resource "aws_iam_instance_profile" "emr_instance_profile" {
  name = "MyTerraformEMRInstanceProfile"
  role = aws_iam_role.emr_ec2_role.name
}

# --- 2. S3 Bucket for EMR Logs ---
resource "aws_s3_bucket" "emr_logs_bucket" {
  bucket = "my-tf-emr-logs-${data.aws_caller_identity.current.account_id}-${data.aws_region.current.name}"
  acl    = "log-delivery-write" # Required for EMR logs

  tags = {
    Name = "EMRLogsBucket"
  }
}

resource "aws_s3_bucket_versioning" "emr_logs_bucket_versioning" {
  bucket = aws_s3_bucket.emr_logs_bucket.id
  versioning_configuration {
    status = "Enabled"
  }
}

# --- 3. EMR Cluster ---
resource "aws_emr_cluster" "main" {
  name          = "MyTerraformEMRCluster"
  release_label = "emr-6.12.0" # Specify a supported EMR release
  applications  = ["Spark", "Hadoop"]

  ec2_attributes {
    instance_profile = aws_iam_instance_profile.emr_instance_profile.arn
    # key_name         = "your-ec2-keypair" # Optional: if you need SSH access
  }

  master_instance_group {
    instance_type = "m5.xlarge"
    instance_count = 1
  }

  core_instance_group {
    instance_type = "m5.xlarge"
    instance_count = 1
  }

  service_role = aws_iam_role.emr_service_role.arn
  log_uri      = "s3://${aws_s3_bucket.emr_logs_bucket.id}/"

  tags = {
    Name = "MyTerraformEMRCluster"
  }

  depends_on = [
    aws_iam_role_policy_attachment.emr_service_role_policy,
    aws_iam_role_policy_attachment.emr_ec2_role_policy,
    aws_iam_role_policy_attachment.emr_s3_access_policy,
    aws_s3_bucket.emr_logs_bucket,
  ]
}

# Data sources for dynamic bucket name
data "aws_caller_identity" "current" {}
data "aws_region" "current" {}

# --- Outputs ---
output "emr_cluster_id" {
  value       = aws_emr_cluster.main.id
  description = "The ID of the EMR cluster."
}

output "emr_cluster_name" {
  value       = aws_emr_cluster.main.name
  description = "The name of the EMR cluster."
}

output "emr_logs_bucket_name" {
  value       = aws_s3_bucket.emr_logs_bucket.bucket
  description = "The S3 bucket used for EMR logs."
}
