#!/bin/bash

# ==========================================================================
# SCRIPT: s3_migrate_data.sh
# DESCRIPTION: Migrates data (objects) from a specified source S3 bucket
#              (or a specific prefix within it) to a destination S3 bucket.
#              This script is useful for tasks like copying production data
#              to a development environment, or moving data between different
#              S3 buckets for organizational or cost optimization purposes.
#              It leverages the `aws s3 sync` command for efficient copying.
#
# USE CASE SCENARIO:
# A data engineering team needs to regularly copy a subset of production data
# (e.g., logs from a specific date range) from a production S3 bucket to a
# development S3 bucket for testing new analytics pipelines. This script
# automates that copying process using AWS CLI.
#
# PREREQUISITES:
# 1.  **AWS CLI:** The AWS Command Line Interface must be installed and configured
#     with credentials that have the necessary permissions.
# 2.  **IAM Permissions:** The principal executing this script must have:
#     - `s3:ListBucket` on the source bucket.
#     - `s3:GetObject` on objects in the source bucket.
#     - `s3:PutObject` on the destination bucket.
# 3.  **Existing Resources:**
#     - A source S3 bucket with data.
#     - A destination S3 bucket.
#
# HOW TO USE:
# 1.  **Save the script:** Save this content as `s3_migrate_data.sh`.
# 2.  **Make it executable:** `chmod +x s3_migrate_data.sh`
# 3.  **Configure variables:** Open the script and update the `--- Configuration Variables ---`
#     section with your specific environment details.
# 4.  **Run from your terminal:** `./s3_migrate_data.sh`
#
# IMPORTANT CONSIDERATIONS:
# - This script uses `aws s3 sync`, which is highly efficient. It only copies new or modified files
#   and can handle large numbers of objects. It also handles directory structures.
# - Ensure the destination bucket has appropriate lifecycle policies if data retention differs.
# - This script does NOT delete objects from the source bucket.
# - If you need to delete objects in the destination that are not in the source, add `--delete` to `aws s3 sync`.
# ==========================================================================

# --- Configuration Variables (REPLACE with your actual values) ---
SOURCE_BUCKET="my-prod-data-bucket"      # Name of the source S3 bucket (e.g., "my-prod-data-bucket")
DESTINATION_BUCKET="my-dev-data-bucket"   # Name of the destination S3 bucket (e.g., "my-dev-data-bucket")
SOURCE_PREFIX=""                     # Optional. Prefix to filter objects in the source bucket (e.g., "logs/2023/").
                                     # Only objects whose keys start with this prefix will be considered.
DESTINATION_PREFIX=""                # Optional. Prefix to add to objects in the destination bucket (e.g., "migrated/").
                                     # This effectively places copied objects into a "folder" in the destination.
AWS_REGION="us-east-1"                 # AWS region where the buckets are located
# ----------------------------------------------------------------

echo "Starting S3 data migration from s3://${SOURCE_BUCKET}/${SOURCE_PREFIX} to s3://${DESTINATION_BUCKET}/${DESTINATION_PREFIX} in region ${AWS_REGION}...\n"

# Validate if source and destination buckets are different to prevent accidental overwrites.
if [ "${SOURCE_BUCKET}" = "${DESTINATION_BUCKET}" ] && [ "${SOURCE_PREFIX}" = "${DESTINATION_PREFIX}" ]; then
    echo "Error: Source and Destination paths are identical. Aborting migration to prevent self-copying. Exiting.\n"
    exit 1
fi

# Construct the full source and destination paths for the `aws s3 sync` command.
# `aws s3 sync` is highly efficient for copying large numbers of objects.
# It only copies new or modified files from the source to the destination.
FULL_SOURCE_PATH="s3://${SOURCE_BUCKET}/${SOURCE_PREFIX}"
FULL_DESTINATION_PATH="s3://${DESTINATION_BUCKET}/${DESTINATION_PREFIX}"

echo "Executing: aws s3 sync \"${FULL_SOURCE_PATH}\" \"${FULL_DESTINATION_PATH}\" --region \"${AWS_REGION}\"\n"

# Execute the `aws s3 sync` command.
# The `|| { ...; exit 1; }` part is a common shell idiom to exit if the previous command fails.
aws s3 sync "${FULL_SOURCE_PATH}" "${FULL_DESTINATION_PATH}" --region "${AWS_REGION}" || { echo "Error: aws s3 sync command failed. Exiting.\n"; exit 1; }

# Check the exit code of the last command.
if [ $? -eq 0 ]; then
    echo "S3 data migration completed successfully.\n"
else
    echo "Error: S3 data migration failed. Please check the logs above for details.\n"
    exit 1
fi
