⬡ Hub
Skip to content

OpenCV: Image and Video I/O and Basic Manipulations

OpenCV (Open Source Computer Vision Library) is a powerful library for computer vision tasks. Its fundamental operations involve reading, writing, and performing basic manipulations on images and videos. Understanding these core functionalities is the first step in any computer vision project.

1. Image I/O: Reading, Displaying, and Saving Images

OpenCV handles images primarily as NumPy arrays. Color images are typically loaded in BGR (Blue, Green, Red) channel order.

a. Reading an Image (cv2.imread())

Reads an image from a specified file.

b. Displaying an Image (cv2.imshow(), matplotlib.pyplot)

  • cv2.imshow(): Displays an image in a window. It requires cv2.waitKey() to keep the window open and cv2.destroyAllWindows() to close it. This is useful for interactive applications.
  • matplotlib.pyplot.imshow(): Often preferred in Jupyter notebooks or scripts for non-interactive display, as it integrates well with other plotting. Remember that Matplotlib expects RGB, while OpenCV loads BGR.

c. Saving an Image (cv2.imwrite())

Saves an image to a specified file. The file extension determines the image format.

import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

# Create a dummy image for demonstration
# Black image (300x400 pixels, 3 channels for color)
dummy_img = np.zeros((300, 400, 3), dtype=np.uint8)
# Draw a red rectangle
cv2.rectangle(dummy_img, (50, 50), (150, 150), (0, 0, 255), -1) # BGR: (0,0,255) is red
# Draw a green circle
cv2.circle(dummy_img, (300, 100), 40, (0, 255, 0), -1) # BGR: (0,255,0) is green
# Add blue text
cv2.putText(dummy_img, 'OpenCV', (50, 250), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

# Define a path to save the dummy image
image_path = 'dummy_opencv_image.png'
cv2.imwrite(image_path, dummy_img)
print(f"Dummy image saved to {image_path}")

# 1. Read the image
img = cv2.imread(image_path)

if img is None:
    print(f"Error: Could not load image from {image_path}. Please check the path.")
else:
    # 2. Display using Matplotlib (Preferred for Jupyter/static plots)
    # Convert BGR (OpenCV) to RGB (Matplotlib)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8, 6))
    plt.imshow(img_rgb)
    plt.title('Image Displayed with Matplotlib (RGB)')
    plt.axis('off') # Hide axes for cleaner image display
    plt.show()

    # 3. Display using cv2.imshow (Interactive window)
    # try:
    #     cv2.imshow('OpenCV Image Window (BGR)', img)
    #     print("Press any key to close the OpenCV image window...")
    #     cv2.waitKey(0) # Waits indefinitely until a key is pressed
    #     cv2.destroyAllWindows() # Closes all OpenCV windows
    # except Exception as e:
    #     print(f"Could not open cv2.imshow window, perhaps running in environment without GUI: {e}")

    # 4. Get image properties
    print(f"\nImage Shape (Height, Width, Channels): {img.shape}")
    print(f"Image Data Type: {img.dtype}") # Typically uint8
    print(f"Total Pixels: {img.size // img.shape[2]} (if color)")

2. Basic Image Manipulations

a. Grayscale Conversion (cv2.cvtColor())

Converting a color image to grayscale is a common preprocessing step.

import cv2
import numpy as np
import matplotlib.pyplot as plt

img_color = cv2.imread('dummy_opencv_image.png')

if img_color is not None:
    gray_img = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)

    plt.figure(figsize=(6, 5))
    plt.imshow(gray_img, cmap='gray') # Use 'gray' colormap for grayscale images
    plt.title('Grayscale Image')
    plt.axis('off')
    plt.show()

    print(f"Grayscale Image Shape: {gray_img.shape}") # (Height, Width) - single channel
else:
    print("Error: Could not load image for grayscale conversion.")

b. Resizing Images (cv2.resize())

Changing the dimensions of an image.

import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('dummy_opencv_image.png')

if img is not None:
    # Resize to a specific dimension (width, height)
    resized_img = cv2.resize(img, (200, 150)) # new_width=200, new_height=150
    print(f"Resized to (200, 150) shape: {resized_img.shape}")

    # Resize by a scaling factor
    # fx, fy are scaling factors along horizontal and vertical axes
    scaled_img = cv2.resize(img, None, fx=0.5, fy=0.5) # Scale to half size
    print(f"Scaled to 0.5x shape: {scaled_img.shape}")

    # Display
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB))
    plt.title('Resized Image')
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(scaled_img, cv2.COLOR_BGR2RGB))
    plt.title('Scaled Image (0.5x)')
    plt.axis('off')
    plt.show()
else:
    print("Error: Could not load image for resizing.")

c. Cropping Images

Cropping an image involves selecting a region of interest (ROI) by simply slicing the NumPy array.

import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('dummy_opencv_image.png')

if img is not None:
    # Crop a region: img[start_row:end_row, start_col:end_col]
    cropped_img = img[50:200, 100:350] # Crop from row 50 to 199, col 100 to 349

    plt.figure(figsize=(6, 5))
    plt.imshow(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB))
    plt.title('Cropped Image')
    plt.axis('off')
    plt.show()
else:
    print("Error: Could not load image for cropping.")

d. Rotating Images (cv2.getRotationMatrix2D(), cv2.warpAffine())

Rotating an image around a specific point.

import cv2
import numpy as np
import matplotlib.pyplot as plt

img = cv2.imread('dummy_opencv_image.png')

if img is not None:
    # Get image dimensions
    (h, w) = img.shape[:2]
    center = (w // 2, h // 2)

    # Rotate the image by 45 degrees around its center, with no scaling
    M = cv2.getRotationMatrix2D(center, 45, 1.0)
    rotated_img = cv2.warpAffine(img, M, (w, h))

    plt.figure(figsize=(6, 5))
    plt.imshow(cv2.cvtColor(rotated_img, cv2.COLOR_BGR2RGB))
    plt.title('Rotated Image (45 degrees)')
    plt.axis('off')
    plt.show()
else:
    print("Error: Could not load image for rotation.")

3. Video I/O: Reading and Writing Videos

OpenCV can also handle video streams, treating them as sequences of frames.

a. Reading a Video (cv2.VideoCapture())

Opens a video file or a camera device.

b. Writing a Video (cv2.VideoWriter())

Saves a sequence of frames as a video file.

import cv2
import numpy as np
import os

# Create a dummy video (optional, for demonstration if you want to write one)
# This part might not run well in all environments without a video source
# dummy_output_video_path = 'dummy_output.avi'
# fourcc = cv2.VideoWriter_fourcc(*'XVID') # Codec for AVI
# out = cv2.VideoWriter(dummy_output_video_path, fourcc, 20.0, (640, 480)) # 20 FPS, 640x480

# for i in range(100): # Write 100 frames
#     frame = np.random.randint(0, 256, (480, 640, 3), dtype=np.uint8)
#     text = f'Frame {i}'
#     cv2.putText(frame, text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
#     out.write(frame)
# out.release()
# print(f"Dummy video created: {dummy_output_video_path}")


# Reading frames from a video (replace 'dummy_output.avi' or 0 for webcam)
# If using a webcam, replace 'dummy_output.avi' with 0
# cap = cv2.VideoCapture('dummy_output.avi') # Or 0 for default webcam

# if not cap.isOpened():
#     print("Error: Could not open video stream or file.")
# else:
#     print("\nReading video frames...")
#     frame_count = 0
#     while True:
#         ret, frame = cap.read() # ret is boolean, frame is the image
#         if not ret:
#             break # End of video or error

#         frame_count += 1
#         # Optional: Process the frame here
#         # For example, convert to grayscale and display
#         gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#         cv2.imshow('Video Frame (Grayscale)', gray_frame)

#         if cv2.waitKey(25) & 0xFF == ord('q'): # Press 'q' to quit
#             break

#     cap.release() # Release the video capture object
#     cv2.destroyAllWindows() # Close all OpenCV windows
#     print(f"Finished reading {frame_count} frames.")

Further Topics:

  • Drawing shapes and text on images.
  • Image arithmetic (addition, subtraction, blending).
  • Color spaces (HSV, LAB).
  • Thresholding and adaptive thresholding.
  • Bitwise operations.

These basic operations form the foundation for more advanced computer vision tasks. Mastering image and video I/O and manipulation is key to effectively using OpenCV in any project involving visual data.