#!/bin/bash

# ==========================================================================
# SCRIPT: db_refresh_rds.sh
# DESCRIPTION: Refreshes an Amazon RDS DB instance (typically non-production)
#              from the latest automated snapshot of a source DB instance
#               (typically production). This script automates the process of
#              finding the latest snapshot, optionally deleting an old target
#              instance, and then restoring a new instance from the snapshot.
#              This is a common practice for maintaining up-to-date
#              development or staging environments with production-like data.
#
# USE CASE SCENARIO:
# A development team needs to periodically refresh their development database
# with the latest data from the production database to test new features or fix
# bugs. Manually performing this process is time-consuming and error-prone.
# This script automates that refresh process using AWS CLI commands.
#
# PREREQUISITES:
# 1.  **AWS CLI:** The AWS Command Line Interface must be installed and configured
#     with credentials that have the necessary permissions.
# 2.  **IAM Permissions:** The principal executing this script (user, role, etc.)
#     must have comprehensive permissions for RDS operations, including:
#     - `rds:DescribeDBSnapshots`
#     - `rds:DeleteDBInstance` (if `DELETE_OLD_INSTANCE` is true. Use with caution!)
#     - `rds:RestoreDBInstanceFromDBSnapshot`
#     - `rds:DescribeDBInstances`
# 3.  **Existing Resources:**
#     - A source RDS DB instance (e.g., your production database) with automated snapshots enabled.
#     - A pre-existing RDS DB Subnet Group where the new instance will be launched.
#     - One or more pre-existing VPC Security Group(s) for the new instance, allowing
#       necessary inbound/outbound traffic.
#
# HOW TO USE:
# 1.  **Save the script:** Save this content as `db_refresh_rds.sh`.
# 2.  **Make it executable:** `chmod +x db_refresh_rds.sh`
# 3.  **Configure variables:** Open the script and update the `--- Configuration Variables ---`
#     section with your specific environment details.
# 4.  **Run from your terminal:** `./db_refresh_rds.sh`
#
# IMPORTANT CONSIDERATIONS:
# - This script is designed for non-production refreshes. Deleting instances with data loss is intended.
# - The new instance will inherit the database engine, version, and master credentials from the snapshot.
# - If the source DB is encrypted, the target DB will also be encrypted (using the same KMS key).
# - The script waits for AWS operations to complete, which can take time depending on instance size.
# - Ensure `jq` is installed for parsing JSON output from AWS CLI (though this script uses `text` output).
# ==========================================================================

# --- Configuration Variables (REPLACE with your actual values) ---
SOURCE_DB_IDENTIFIER="my-prod-db"          # Identifier of the source RDS DB instance (e.g., "my-prod-db-instance")
TARGET_DB_IDENTIFIER="my-dev-db"          # Identifier for the new/refreshed RDS DB instance (e.g., "my-dev-db-refreshed")
DB_INSTANCE_CLASS="db.t3.medium"          # DB instance class for the new instance (e.g., "db.t3.medium")
DB_SUBNET_GROUP_NAME="my-db-subnet-group" # DB Subnet Group Name where the new instance will be deployed
VPC_SECURITY_GROUP_IDS="sg-0abcdef1234567890" # Comma-separated list of Security Group IDs (e.g., "sg-id1,sg-id2")
AWS_REGION="us-east-1"                    # AWS region where the RDS instances are located
DELETE_OLD_INSTANCE="true"                # Set to "true" (string) to delete existing target instance, "false" otherwise. USE WITH CAUTION!
# ----------------------------------------------------------------

echo "Starting RDS refresh process for '${TARGET_DB_IDENTIFIER}' from '${SOURCE_DB_IDENTIFIER}' in region ${AWS_REGION}...\n"

# ==========================================================================
# STEP 1: Identify the latest automated snapshot of the source DB instance.
# Automated snapshots are regularly created by RDS for backup purposes.
# ==========================================================================
echo ">>> Step 1: Searching for the latest automated snapshot of '${SOURCE_DB_IDENTIFIER}'..."

# Use `aws rds describe-db-snapshots` to list snapshots.
# ` --snapshot-type "automated"` ensures we only look for automatically created snapshots.
# `sort_by(@, &SnapshotCreateTime)` sorts the snapshots by creation time.
# `[-1].DBSnapshotIdentifier` picks the identifier of the last (latest) snapshot.
LATEST_SNAPSHOT_ID=$(aws rds describe-db-snapshots \
    --db-instance-identifier "${SOURCE_DB_IDENTIFIER}" \
    --snapshot-type "automated" \
    --query "DBSnapshots | sort_by(@, &SnapshotCreateTime) | [-1].DBSnapshotIdentifier" \
    --region "${AWS_REGION}" \
    --output text 2>/dev/null)

# Check if a snapshot ID was actually found. If not, exit with an error.
if [ -z "${LATEST_SNAPSHOT_ID}" ]; then
    echo "Error: No automated snapshots found for DB instance '${SOURCE_DB_IDENTIFIER}'. Please ensure automated backups are enabled and the identifier is correct. Exiting.\n"
    exit 1
fi

# Retrieve the creation time of the latest snapshot for display.\
LATEST_SNAPSHOT_TIME=$(aws rds describe-db-snapshots \
    --db-instance-identifier "${SOURCE_DB_IDENTIFIER}" \
    --snapshot-type "automated" \
    --query "DBSnapshots | sort_by(@, &SnapshotCreateTime) | [-1].SnapshotCreateTime" \
    --region "${AWS_REGION}" \
    --output text)

echo "   Found latest snapshot: '${LATEST_SNAPSHOT_ID}' created at ${LATEST_SNAPSHOT_TIME}"

# ==========================================================================
# STEP 2: (Optional) Delete the old target DB instance if it exists and
#         DELETE_OLD_INSTANCE is set to "true".
# This is crucial for refreshing an existing environment. USE WITH CAUTION!
# ==========================================================================
if [ "${DELETE_OLD_INSTANCE}" = "true" ]; then
    echo "\n>>> Step 2: Checking for and deleting existing target DB instance '${TARGET_DB_IDENTIFIER}' (as requested)..."
    
    # Check if the target DB instance currently exists.
    DB_INSTANCE_STATUS=$(aws rds describe-db-instances \
        --db-instance-identifier "${TARGET_DB_IDENTIFIER}" \
        --query "DBInstances[0].DBInstanceStatus" \
        --region "${AWS_REGION}" \
        --output text 2>/dev/null)

    if [ -n "${DB_INSTANCE_STATUS}" ]; then
        echo "   Existing instance '${TARGET_DB_IDENTIFIER}' found with status: ${DB_INSTANCE_STATUS}. Initiating deletion..."
        
        # Delete the DB instance. ` --skip-final-snapshot` is used to prevent creating
        # a final snapshot before deletion, which is usually desired for non-production refreshes
        # to speed up the process. `--delete-automated-backups` cleans up all associated automated backups.
        aws rds delete-db-instance \
            --db-instance-identifier "${TARGET_DB_IDENTIFIER}" \
            --skip-final-snapshot \
            --delete-automated-backups \
            --region "${AWS_REGION}" || { echo "Error deleting instance. Exiting."; exit 1; }

        echo "   Deletion initiated for '${TARGET_DB_IDENTIFIER}'. Waiting for it to be fully deleted... (This may take several minutes)"
        
        # Use `aws rds wait db-instance-deleted` to block script execution until the instance
        # is confirmed deleted. This prevents the script from trying to restore with a conflicting identifier.
        aws rds wait db-instance-deleted \
            --db-instance-identifier \"${TARGET_DB_IDENTIFIER}\" \
            --region \"${AWS_REGION}\" || { echo \"Wait for deletion failed. Exiting.\"; exit 1; }\n        echo \"   Instance \'${TARGET_DB_IDENTIFIER}\' deleted successfully.\"\n    else\n        echo \"   No existing instance \'${TARGET_DB_IDENTIFIER}\' found. Skipping deletion.\"\n    fi\nelif [ \"${DELETE_OLD_INSTANCE}\" = \"false\" ]; then\n    echo \"\n>>> Step 2: Skipping deletion of old instance as DELETE_OLD_INSTANCE is set to 'false'. Ensure no existing instance with \'${TARGET_DB_IDENTIFIER}\' conflicts with the new instance ID.\"\nelse\n    echo \"\n>>> Step 2: Invalid value for DELETE_OLD_INSTANCE. Must be \"true\" or \"false\". Skipping deletion.\"\nfi\n\n# ==========================================================================\n# STEP 3: Restore a new RDS instance from the latest identified snapshot.\n# This is the core operation of refreshing the database.\n# ==========================================================================\necho \"\n>>> Step 3: Restoring new DB instance \'${TARGET_DB_IDENTIFIER}\' from snapshot \'${LATEST_SNAPSHOT_ID}\'...\"\naws rds restore-db-instance-from-db-snapshot \\\n    --db-instance-identifier \"${TARGET_DB_IDENTIFIER}\" \\\n    --db-snapshot-identifier \"${LATEST_SNAPSHOT_ID}\" \\\n    --db-instance-class \"${DB_INSTANCE_CLASS}\" \\\n    --db-subnet-group-name \"${DB_SUBNET_GROUP_NAME}\" \\\n    --vpc-security-group-ids \"${VPC_SECURITY_GROUP_IDS}\" \\\n    --publicly-accessible \"false\" \\\n    --tags Key=Name,Value=\"${TARGET_DB_IDENTIFIER}\" \\\n    --region \"${AWS_REGION}\" || { echo \"Error restoring instance. Exiting.\"; exit 1; }\n\necho \"   Restore initiated for \'${TARGET_DB_IDENTIFIER}\'. Waiting for it to be available... (This may take several minutes or longer)\n\"\naws rds wait db-instance-available \\\n    --db-instance-identifier \"${TARGET_DB_IDENTIFIER}\" \
    --region "${AWS_REGION}" || { echo "Wait for new instance failed. Exiting."; exit 1; }

echo "   New DB instance '${TARGET_DB_IDENTIFIER}' restored and is now AVAILABLE.\n"

# ==========================================================================
# STEP 4: Retrieve and print the endpoint of the newly restored instance.
# This is the connection string applications will use.
# ==========================================================================
echo ">>> Step 4: Retrieving endpoint for new instance '${TARGET_DB_IDENTIFIER}'..."
NEW_INSTANCE_ENDPOINT=$(aws rds describe-db-instances \
    --db-instance-identifier "${TARGET_DB_IDENTIFIER}" \
    --query "DBInstances[0].Endpoint.Address" \
    --region "${AWS_REGION}" \
    --output text 2>/dev/null)

NEW_INSTANCE_PORT=$(aws rds describe-db-instances \
    --db-instance-identifier "${TARGET_DB_IDENTIFIER}" \
    --query "DBInstances[0].Endpoint.Port" \
    --region "${AWS_REGION}" \
    --output text 2>/dev/null)

if [ -n "${NEW_INSTANCE_ENDPOINT}" ] && [ -n "${NEW_INSTANCE_PORT}" ]; then
    echo "   New instance endpoint: ${NEW_INSTANCE_ENDPOINT}:${NEW_INSTANCE_PORT}"
    echo "   You can now connect to your refreshed database using this endpoint.\n"
else
    echo "Error: Could not retrieve endpoint for new instance '${TARGET_DB_IDENTIFIER}'. Please check the AWS console.\n"
fi

echo "=== RDS refresh process for '${TARGET_DB_IDENTIFIER}' completed successfully. ===\n"
