#!/usr/bin/env python3

import json
import argparse
import psycopg2 # Example for PostgreSQL. Replace with appropriate driver for other DBs.
import os

# ==========================================================================
# SCRIPT: rds_execute_sql.py
# DESCRIPTION: Executes SQL statements from a local .sql file on an Amazon RDS
#              PostgreSQL database. This script is useful for applying schema
#              changes, loading initial data, or running maintenance scripts.
#
# USE CASE SCENARIO:
# A development team needs to apply a new database schema or load seed data
# into their RDS PostgreSQL instance as part of a deployment process. This
# script automates the execution of SQL scripts against the database.
#
# PREREQUISITES:
# 1.  **Python Libraries:** The following Python libraries must be installed:
#     - `psycopg2-binary`: PostgreSQL adapter for Python (`pip install psycopg2-binary`)
#       (For MySQL, use `pymysql`; for SQL Server, use `pyodbc` or `pymssql`)
# 2.  **IAM Permissions:** The principal executing this script needs network access
#     to the RDS instance. Ensure the security group of the RDS instance allows
#     inbound connections from where this script is run.
# 3.  **Existing Resources:**
#     - An Amazon RDS PostgreSQL instance.
#     - A local .sql file containing the SQL statements to be executed.
#
# HOW TO USE:
# 1.  **Save the script:** Save this content as `rds_execute_sql.py`.
# 2.  **Make it executable (Linux/macOS):** `chmod +x rds_execute_sql.py`
# 3.  **Prepare your SQL file:** Create a .sql file (e.g., `schema.sql`)
#     containing the SQL statements. Example:
#     `CREATE TABLE IF NOT EXISTS products (id SERIAL PRIMARY KEY, name VARCHAR(255), price DECIMAL(10, 2));`
#     `INSERT INTO products (name, price) VALUES ('Laptop', 1200.00);`
# 4.  **Run from your terminal:**
#     python rds_execute_sql.py \
#       --db-host "my-rds-instance.abcdefghijk.us-east-1.rds.amazonaws.com" \
#       --db-port 5432 \
#       --db-name "mydb" \
#       --db-user "dbadmin" \
#       --db-password "MySecurePassword123!" \
#       --sql-file-path "./schema.sql"
#
#     **Arguments:**
#     - `--db-host`: The hostname or IP address of the RDS instance.
#     - `--db-port`: The port number of the database (default: 5432 for PostgreSQL).
#     - `--db-name`: The name of the database to connect to.
#     - `--db-user`: The username to connect to the database.
#     - `--db-password`: The password for the database user.
#     - `--sql-file-path`: The path to the local .sql file containing the statements.
#
# IMPORTANT CONSIDERATIONS:
# - This script executes all SQL statements in the file as a single transaction.
#   If any statement fails, the entire transaction is rolled back.
# - Ensure the database user has appropriate permissions to execute the SQL statements.
# - For very large SQL files or complex migrations, consider using database-specific
#   migration tools (e.g., Flyway, Liquibase) or AWS DMS.
# ==========================================================================

def execute_sql_on_rds(
    db_host: str,
    db_port: int,
    db_name: str,
    db_user: str,
    db_password: str,
    sql_file_path: str
):
    """
    Executes SQL statements from a local .sql file on an RDS PostgreSQL database.

    This function establishes a connection to the specified RDS PostgreSQL database,
    reads all SQL statements from the provided file, and executes them.
    It handles connection and execution errors, and commits the transaction upon success.

    Args:
        db_host (str): The hostname or IP address of the RDS instance.
        db_port (int): The port number of the database (e.g., 5432 for PostgreSQL).
        db_name (str): The name of the database to connect to.
        db_user (str): The username for database authentication.
        db_password (str): The password for the database user.
        sql_file_path (str): The absolute or relative path to the .sql file containing the statements.
    """
    print(f"Starting SQL execution from '{sql_file_path}' on RDS database '{db_name}'...")

    # ==========================================================================
    # STEP 1: Validate the existence of the local SQL file.
    # The script cannot proceed if the SQL file does not exist.
    # ==========================================================================
    print(f"\n>>> Step 1: Checking for local SQL file: '{sql_file_path}'...")
    if not os.path.exists(sql_file_path):
        print(f"Error: SQL file not found at '{sql_file_path}'. Please check the path. Exiting.")
        return
    print(f"   Local SQL file '{sql_file_path}' found.")

    # ==========================================================================
    # STEP 2: Read SQL statements from the file.
    # The entire content of the file is read as a single string.
    # ==========================================================================
    print(f"\n>>> Step 2: Reading SQL statements from '{sql_file_path}'...")
    sql_statements = None
    try:
        with open(sql_file_path, 'r') as f:
            sql_statements = f.read()
        if not sql_statements.strip():
            print(f"Warning: SQL file '{sql_file_path}' is empty. No statements to execute.")
            return
        print(f"   Successfully loaded SQL statements from {sql_file_path}.")
    except Exception as e:
        print(f"Error reading SQL file: {e}. Exiting.")
        return

    # ==========================================================================
    # STEP 3: Connect to the RDS PostgreSQL database.
    # This uses the `psycopg2` library to establish a connection.
    # ==========================================================================
    print(f"\n>>> Step 3: Connecting to RDS database '{db_name}' at {db_host}:{db_port}...")
    conn = None
    try:
        conn = psycopg2.connect(
            host=db_host,
            port=db_port,
            database=db_name,
            user=db_user,
            password=db_password
        )
        cur = conn.cursor() # Create a cursor object to execute SQL commands.
        print("   Successfully connected to RDS database.")

        # ==========================================================================
        # STEP 4: Execute the SQL statements.
        # `cur.execute()` can handle multiple statements if they are separated by semicolons.
        # The entire execution is wrapped in a single transaction.
        # ==========================================================================
        print(f"\n>>> Step 4: Executing SQL statements from '{sql_file_path}'...")
        cur.execute(sql_statements)
        conn.commit() # Commit the transaction if all statements execute successfully.
        print(f"   Successfully executed SQL statements from '{sql_file_path}'. Transaction committed.")

    except psycopg2.Error as e:
        # Catch specific PostgreSQL errors.
        print(f"Error during database connection or SQL execution: {e}")
        if conn:
            conn.rollback() # Rollback the transaction on error to maintain data integrity.
            print("   Transaction rolled back due to error.")
    except Exception as e:
        # Catch any other unexpected errors during database operations.
        print(f"An unexpected error occurred during database operations: {e}")
    finally:
        # Ensure the database connection is closed, regardless of success or failure.
        if conn:
            cur.close()
            conn.close()
            print("   Database connection closed.")

    print("\n=== RDS SQL execution completed. ===")

# ==========================================================================
# Main execution block to parse command-line arguments and call the function.
# This allows the script to be run from the command line with various options.
# ==========================================================================
if __name__ == "__main__":
    # Create an argument parser object.
    parser = argparse.ArgumentParser(
        description="""
        Executes SQL statements from a local .sql file on an Amazon RDS PostgreSQL database.
        Useful for schema migrations, data loading, or running maintenance scripts.
        """
    )
    # Define the command-line arguments the script expects.
    parser.add_argument(
        "--db-host", 
        required=True, 
        help="The hostname or IP address of the RDS instance (e.g., 'my-rds-instance.abcdefghijk.us-east-1.rds.amazonaws.com')."
    )
    parser.add_argument(
        "--db-port", 
        type=int, 
        default=5432, 
        help="The port number of the database (default: 5432 for PostgreSQL)."
    )
    parser.add_argument(
        "--db-name", 
        required=True, 
        help="The name of the database to connect to (e.g., 'mydb')."
    )
    parser.add_argument(
        "--db-user", 
        required=True, 
        help="The username for database authentication (e.g., 'dbadmin')."
    )
    parser.add_argument(
        "--db-password", 
        required=True, 
        help="The password for the database user."
    )
    parser.add_argument(
        "--sql-file-path", 
        required=True, 
        help="The path to the local .sql file containing the statements (e.g., './sql/schema.sql')."
    )

    # Parse the arguments provided by the user when running the script.
    args = parser.parse_args()

    # Call the main function with the parsed arguments.
    execute_sql_on_rds(
        db_host=args.db_host,
        db_port=args.db_port,
        db_name=args.db_name,
        db_user=args.db_user,
        db_password=args.db_password,
        sql_file_path=args.sql_file_path
    )
