Skip to content

OpenCV入门指南

1. OpenCV简介

OpenCV(Open Source Computer Vision Library)是一个开源的计算机视觉和机器学习软件库。它最初由Intel开发,现在由Willow Garage和Itseez维护。OpenCV提供了超过2500个优化的算法,包括经典和最先进的计算机视觉和机器学习算法。

1.1 OpenCV的特点

  • 跨平台:支持Windows、Linux、macOS、Android、iOS等多个平台
  • 多语言支持:主要使用C++编写,提供Python、Java、MATLAB等语言接口
  • 高性能:针对实时应用进行了优化
  • 开源免费:基于BSD许可证,可用于商业和研究用途
  • 功能丰富:涵盖图像处理、计算机视觉、机器学习等多个领域

1.2 OpenCV的应用领域

  • 图像和视频处理:滤波、变换、增强等
  • 目标检测与识别:人脸检测、物体识别、文字识别
  • 运动分析:光流、目标跟踪、运动检测
  • 机器学习:分类、聚类、回归等
  • 增强现实:相机标定、姿态估计
  • 医学图像分析:医学影像处理和分析
  • 工业检测:质量控制、缺陷检测

2. 安装与环境配置

2.1 Python环境安装

bash
# 使用pip安装OpenCV
pip install opencv-python

# 安装完整版本(包含额外功能)
pip install opencv-contrib-python

# 验证安装
python -c "import cv2; print(cv2.__version__)"

2.2 必要的依赖库

bash
# 安装常用的图像处理和科学计算库
pip install numpy matplotlib pillow
pip install jupyter notebook  # 用于交互式开发

2.3 开发环境设置

python
# 导入必要的库
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams

# 设置matplotlib显示中文
rcParams['font.sans-serif'] = ['SimHei']
rcParams['axes.unicode_minus'] = False

# 定义显示图像的辅助函数
def show_image(image, title="Image", figsize=(10, 6)):
    """显示图像的辅助函数"""
    plt.figure(figsize=figsize)
    if len(image.shape) == 3:
        # 彩色图像:BGR转RGB
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.imshow(image_rgb)
    else:
        # 灰度图像
        plt.imshow(image, cmap='gray')
    plt.title(title)
    plt.axis('off')
    plt.show()

def show_images(images, titles, figsize=(15, 5)):
    """显示多个图像的辅助函数"""
    n = len(images)
    plt.figure(figsize=figsize)
    for i in range(n):
        plt.subplot(1, n, i+1)
        if len(images[i].shape) == 3:
            image_rgb = cv2.cvtColor(images[i], cv2.COLOR_BGR2RGB)
            plt.imshow(image_rgb)
        else:
            plt.imshow(images[i], cmap='gray')
        plt.title(titles[i])
        plt.axis('off')
    plt.tight_layout()
    plt.show()

print("OpenCV环境配置完成!")
print(f"OpenCV版本: {cv2.__version__}")

3. 基础图像操作

3.1 图像读取、显示和保存

python
# 读取图像
image = cv2.imread('path/to/image.jpg')  # 彩色图像
gray_image = cv2.imread('path/to/image.jpg', cv2.IMREAD_GRAYSCALE)  # 灰度图像

# 检查图像是否成功读取
if image is None:
    print("无法读取图像,请检查路径")
else:
    print(f"图像尺寸: {image.shape}")
    print(f"图像类型: {image.dtype}")

# 显示图像(使用OpenCV窗口)
cv2.imshow('Original Image', image)
cv2.waitKey(0)  # 等待按键
cv2.destroyAllWindows()  # 关闭所有窗口

# 保存图像
cv2.imwrite('output.jpg', image)

# 使用matplotlib显示图像(推荐用于Jupyter)
show_image(image, "原始图像")

3.2 图像基本属性

python
# 创建示例图像
image = np.zeros((300, 400, 3), dtype=np.uint8)
image[:, :, 2] = 255  # 红色通道

print("图像基本属性:")
print(f"形状 (高度, 宽度, 通道数): {image.shape}")
print(f"总像素数: {image.size}")
print(f"数据类型: {image.dtype}")
print(f"图像维度: {image.ndim}")

# 获取图像尺寸
height, width = image.shape[:2]
if len(image.shape) == 3:
    channels = image.shape[2]
    print(f"尺寸: {width}x{height}, 通道数: {channels}")
else:
    print(f"尺寸: {width}x{height} (灰度图像)")

# 访问和修改像素值
# 获取像素值
pixel_value = image[100, 150]  # (y, x)
print(f"像素值 (100, 150): {pixel_value}")

# 修改像素值
image[100:200, 150:250] = [0, 255, 0]  # 绿色矩形区域
show_image(image, "修改后的图像")

3.3 图像通道操作

python
# 分离颜色通道
b, g, r = cv2.split(image)

# 显示各个通道
show_images([b, g, r], ['蓝色通道', '绿色通道', '红色通道'])

# 合并通道
merged = cv2.merge([b, g, r])
show_image(merged, "合并后的图像")

# 创建单通道图像
zeros = np.zeros(image.shape[:2], dtype=np.uint8)
red_only = cv2.merge([zeros, zeros, r])
green_only = cv2.merge([zeros, g, zeros])
blue_only = cv2.merge([b, zeros, zeros])

show_images([red_only, green_only, blue_only], 
           ['仅红色通道', '仅绿色通道', '仅蓝色通道'])

3.4 颜色空间转换

python
# 创建彩色测试图像
test_image = cv2.imread('test_image.jpg')

# BGR转RGB
rgb_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)

# BGR转灰度
gray = cv2.cvtColor(test_image, cv2.COLOR_BGR2GRAY)

# BGR转HSV
hsv = cv2.cvtColor(test_image, cv2.COLOR_BGR2HSV)

# BGR转LAB
lab = cv2.cvtColor(test_image, cv2.COLOR_BGR2LAB)

# 显示不同颜色空间
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

axes[0, 0].imshow(rgb_image)
axes[0, 0].set_title('RGB')
axes[0, 0].axis('off')

axes[0, 1].imshow(gray, cmap='gray')
axes[0, 1].set_title('灰度')
axes[0, 1].axis('off')

axes[0, 2].imshow(cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB))
axes[0, 2].set_title('HSV')
axes[0, 2].axis('off')

axes[1, 0].imshow(cv2.cvtColor(lab, cv2.COLOR_LAB2RGB))
axes[1, 0].set_title('LAB')
axes[1, 0].axis('off')

# HSV通道分离
h, s, v = cv2.split(hsv)
axes[1, 1].imshow(h, cmap='hsv')
axes[1, 1].set_title('HSV - 色调')
axes[1, 1].axis('off')

axes[1, 2].imshow(s, cmap='gray')
axes[1, 2].set_title('HSV - 饱和度')
axes[1, 2].axis('off')

plt.tight_layout()
plt.show()

4. 图像处理基础

4.1 图像几何变换

python
# 读取图像
image = cv2.imread('sample.jpg')
height, width = image.shape[:2]

# 1. 图像缩放
# 指定尺寸缩放
resized = cv2.resize(image, (400, 300))

# 按比例缩放
scale_percent = 50  # 缩放50%
new_width = int(width * scale_percent / 100)
new_height = int(height * scale_percent / 100)
scaled = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)

show_images([image, resized, scaled], 
           ['原图', '指定尺寸', '按比例缩放'])

# 2. 图像旋转
center = (width // 2, height // 2)
angle = 45
scale = 1.0

# 获取旋转矩阵
rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)

# 应用旋转
rotated = cv2.warpAffine(image, rotation_matrix, (width, height))

# 3. 图像平移
tx, ty = 100, 50  # x方向平移100像素,y方向平移50像素
translation_matrix = np.float32([[1, 0, tx], [0, 1, ty]])
translated = cv2.warpAffine(image, translation_matrix, (width, height))

# 4. 图像翻转
flipped_horizontal = cv2.flip(image, 1)  # 水平翻转
flipped_vertical = cv2.flip(image, 0)    # 垂直翻转
flipped_both = cv2.flip(image, -1)       # 水平垂直翻转

show_images([rotated, translated, flipped_horizontal, flipped_vertical], 
           ['旋转45°', '平移', '水平翻转', '垂直翻转'])

4.2 图像滤波

python
# 创建带噪声的测试图像
image = cv2.imread('test.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 添加噪声
noise = np.random.randint(0, 50, gray.shape, dtype=np.uint8)
noisy = cv2.add(gray, noise)

# 1. 均值滤波(模糊)
blur_mean = cv2.blur(noisy, (5, 5))

# 2. 高斯滤波
blur_gaussian = cv2.GaussianBlur(noisy, (5, 5), 0)

# 3. 中值滤波(去除椒盐噪声效果好)
blur_median = cv2.medianBlur(noisy, 5)

# 4. 双边滤波(保边去噪)
blur_bilateral = cv2.bilateralFilter(noisy, 9, 75, 75)

# 显示滤波结果
images = [gray, noisy, blur_mean, blur_gaussian, blur_median, blur_bilateral]
titles = ['原图', '加噪声', '均值滤波', '高斯滤波', '中值滤波', '双边滤波']

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
for i, (img, title) in enumerate(zip(images, titles)):
    row, col = i // 3, i % 3
    axes[row, col].imshow(img, cmap='gray')
    axes[row, col].set_title(title)
    axes[row, col].axis('off')
plt.tight_layout()
plt.show()

4.3 边缘检测

python
# 边缘检测
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# 1. Sobel边缘检测
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)  # x方向
sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)  # y方向
sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)

# 2. Laplacian边缘检测
laplacian = cv2.Laplacian(gray, cv2.CV_64F)

# 3. Canny边缘检测
canny = cv2.Canny(gray, 50, 150)

# 显示边缘检测结果
edge_images = [gray, sobel_x, sobel_y, sobel_combined, laplacian, canny]
edge_titles = ['原图', 'Sobel X', 'Sobel Y', 'Sobel合并', 'Laplacian', 'Canny']

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
for i, (img, title) in enumerate(zip(edge_images, edge_titles)):
    row, col = i // 3, i % 3
    axes[row, col].imshow(img, cmap='gray')
    axes[row, col].set_title(title)
    axes[row, col].axis('off')
plt.tight_layout()
plt.show()

# Canny边缘检测参数调整示例
def canny_demo(image):
    """交互式Canny边缘检测演示"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 不同阈值的Canny边缘检测
    thresholds = [(50, 150), (100, 200), (50, 200), (100, 150)]
    results = []
    
    for low, high in thresholds:
        canny = cv2.Canny(gray, low, high)
        results.append(canny)
    
    titles = [f'Canny({low}, {high})' for low, high in thresholds]
    show_images(results, titles)

canny_demo(image)

5. 实际应用案例

5.1 人脸检测

python
# 人脸检测
def detect_faces(image):
    # 加载人脸检测器
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    # 转换为灰度图
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # 检测人脸
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    # 绘制人脸边界框
    result = image.copy()
    for (x, y, w, h) in faces:
        cv2.rectangle(result, (x, y), (x+w, y+h), (255, 0, 0), 2)
        cv2.putText(result, 'Face', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
    
    return result, faces

# 使用示例
# result, faces = detect_faces(image)
# show_image(result, f"检测到 {len(faces)} 个人脸")

5.2 轮廓检测和形状识别

python
# 轮廓检测和形状识别
def shape_recognition(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    result = image.copy()
    
    for contour in contours:
        area = cv2.contourArea(contour)
        if area < 500:
            continue
        
        # 轮廓近似
        perimeter = cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
        
        # 计算边界矩形
        x, y, w, h = cv2.boundingRect(contour)
        
        # 根据顶点数量识别形状
        vertices = len(approx)
        shape = "Unknown"
        
        if vertices == 3:
            shape = "Triangle"
        elif vertices == 4:
            # 检查是否为正方形或矩形
            aspect_ratio = float(w) / h
            if 0.95 <= aspect_ratio <= 1.05:
                shape = "Square"
            else:
                shape = "Rectangle"
        elif vertices == 5:
            shape = "Pentagon"
        elif vertices > 5:
            # 检查是否为圆形
            area_contour = cv2.contourArea(contour)
            area_circle = np.pi * (w/2) * (h/2)
            if abs(area_contour - area_circle) / area_circle < 0.2:
                shape = "Circle"
            else:
                shape = "Polygon"
        
        # 绘制轮廓和标签
        cv2.drawContours(result, [contour], -1, (0, 255, 0), 2)
        cv2.putText(result, shape, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
    
    return result

5.3 颜色检测

python
# 基于HSV的颜色检测
def color_detection(image, color_name, lower_bound, upper_bound):
    # 转换到HSV颜色空间
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # 创建颜色掩码
    mask = cv2.inRange(hsv, lower_bound, upper_bound)
    
    # 形态学操作去噪
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    # 查找轮廓
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # 绘制检测结果
    result = image.copy()
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 500:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)
            cv2.putText(result, color_name, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    
    return result, mask

# 预定义的颜色范围
color_ranges = {
    'red': (np.array([0, 50, 50]), np.array([10, 255, 255])),
    'green': (np.array([40, 50, 50]), np.array([80, 255, 255])),
    'blue': (np.array([100, 50, 50]), np.array([130, 255, 255])),
    'yellow': (np.array([20, 50, 50]), np.array([30, 255, 255]))
}

# 使用示例
# for color_name, (lower, upper) in color_ranges.items():
#     result, mask = color_detection(image, color_name, lower, upper)
#     show_images([result, mask], [f'{color_name}检测结果', f'{color_name}掩码'])

6. 最佳实践和技巧

6.1 性能优化

python
# 1. 使用合适的数据类型
# 对于0-255的图像,使用uint8而不是float64
image_uint8 = np.array(image, dtype=np.uint8)

# 2. 避免不必要的颜色空间转换
# 如果只需要灰度图,直接读取灰度图
gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)

# 3. 使用ROI(感兴趣区域)减少处理区域
roi = image[100:300, 200:400]  # 只处理指定区域

# 4. 批量处理多个图像
def batch_process_images(image_paths, process_func):
    results = []
    for path in image_paths:
        image = cv2.imread(path)
        if image is not None:
            result = process_func(image)
            results.append(result)
    return results

6.2 错误处理和调试

python
# 1. 检查图像是否成功读取
def safe_imread(path):
    image = cv2.imread(path)
    if image is None:
        raise ValueError(f"无法读取图像: {path}")
    return image

# 2. 检查图像尺寸
def validate_image_size(image, min_width=100, min_height=100):
    h, w = image.shape[:2]
    if w < min_width or h < min_height:
        raise ValueError(f"图像尺寸太小: {w}x{h}")

# 3. 调试辅助函数
def debug_image_info(image, name="Image"):
    print(f"{name} 信息:")
    print(f"  形状: {image.shape}")
    print(f"  数据类型: {image.dtype}")
    print(f"  最小值: {image.min()}")
    print(f"  最大值: {image.max()}")
    print(f"  均值: {image.mean():.2f}")

6.3 常见问题解决

python
# 1. 解决中文路径问题
def imread_chinese_path(path):
    """读取包含中文路径的图像"""
    return cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_COLOR)

def imwrite_chinese_path(path, image):
    """保存到包含中文的路径"""
    cv2.imencode('.jpg', image)[1].tofile(path)

# 2. 内存管理
def process_large_image(image_path, chunk_size=1000):
    """分块处理大图像"""
    image = cv2.imread(image_path)
    h, w = image.shape[:2]
    
    results = []
    for y in range(0, h, chunk_size):
        for x in range(0, w, chunk_size):
            chunk = image[y:y+chunk_size, x:x+chunk_size]
            # 处理chunk
            processed_chunk = cv2.GaussianBlur(chunk, (5, 5), 0)
            results.append((y, x, processed_chunk))
    
    return results

# 3. 参数验证
def validate_kernel_size(size):
    """验证卷积核大小"""
    if size % 2 == 0:
        raise ValueError("卷积核大小必须为奇数")
    if size < 3:
        raise ValueError("卷积核大小至少为3")