#!/usr/bin/env python # -*- coding: utf-8 -*- """ 针对中文OCR的图像预处理优化 """ import cv2 import numpy as np from typing import Optional, Tuple, List, Dict, Any def optimize_for_chinese(image: np.ndarray) -> np.ndarray: """ 针对中文文本的图像优化处理 Args: image: 输入图像的NumPy数组 Returns: 优化后的图像NumPy数组 """ # 确保图像不为空 if image is None or image.size == 0: raise ValueError("输入图像为空") # 转换为灰度图 if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() # 1. 自适应二值化 - 对于不同分辨率和对比度的图像很有效 binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 15 ) # 2. 对二值化图像进行形态学操作,使文字更清晰 # 创建一个长方形核,水平方向较小,垂直方向较大 # 这有助于保持中文字符的笔画连接 kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 3)) # 闭运算 - 用于连接断开的部分,尤其对于中文细笔画非常有效 morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1) # 3. 降噪 - 去除小的噪点 # 查找所有轮廓 contours, _ = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # 创建一个空白图像 cleaned = np.zeros_like(morph) # 筛选轮廓 - 保留较大的轮廓(文字),去除较小的轮廓(噪点) min_contour_area = 20 # 最小轮廓面积,可以根据实际情况调整 for contour in contours: if cv2.contourArea(contour) > min_contour_area: cv2.drawContours(cleaned, [contour], -1, 255, -1) # 4. 反转回来 - 因为OCR通常需要黑底白字 cleaned_inverted = cv2.bitwise_not(cleaned) # 5. 对图像进行锐化,提高轮廓清晰度 # 创建一个锐化核 sharpen_kernel = np.array([[-1,-1,-1], [-1, 9,-1], [-1,-1,-1]]) sharpened = cv2.filter2D(cleaned_inverted, -1, sharpen_kernel) # 6. 确保图像完全二值化 _, final = cv2.threshold(sharpened, 127, 255, cv2.THRESH_BINARY) return final def optimize_for_chinese_advanced(image: np.ndarray) -> List[np.ndarray]: """ 针对中文文本的多种高级图像优化处理,返回多种优化结果 Args: image: 输入图像的NumPy数组 Returns: 优化后的图像NumPy数组列表 """ # 确保图像不为空 if image is None or image.size == 0: raise ValueError("输入图像为空") # 转换为灰度图 if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() results = [] # 方法1: 自适应二值化基础版 binary1 = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 25, 15 ) results.append(binary1) # 方法2: 自适应二值化增强版 binary2 = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 35, 15 ) results.append(binary2) # 方法3: Otsu二值化 _, binary3 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) results.append(binary3) # 方法4: 应用高斯模糊后再Otsu二值化 blurred = cv2.GaussianBlur(gray, (5, 5), 0) _, binary4 = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) results.append(binary4) # 方法5: 增强对比度后的二值化 # 创建CLAHE对象 clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) # 应用CLAHE增强对比度 contrast_enhanced = clahe.apply(gray) _, binary5 = cv2.threshold(contrast_enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) results.append(binary5) # 方法6: 使用基本优化函数 basic_optimized = optimize_for_chinese(image) results.append(basic_optimized) # 方法7: 形态学操作 # 先进行二值化 _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # 创建一个椭圆核 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) # 开运算去除噪点 opened = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1) # 闭运算连接断开的笔画 morph = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel, iterations=1) results.append(morph) # 方法8: 锐化处理 sharpen_kernel = np.array([[-1,-1,-1], [-1, 9,-1], [-1,-1,-1]]) sharpened = cv2.filter2D(gray, -1, sharpen_kernel) _, binary8 = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) results.append(binary8) # 方法9: 边缘增强 # 先进行高斯模糊 blurred = cv2.GaussianBlur(gray, (0, 0), 3) # 使用unsharp masking技术 edge_enhanced = cv2.addWeighted(gray, 1.5, blurred, -0.5, 0) _, binary9 = cv2.threshold(edge_enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) results.append(binary9) return results def detect_and_correct_skew(image: np.ndarray, angle_range: Tuple[int, int] = (-15, 15), angle_step: float = 0.5) -> np.ndarray: """ 检测并修正图像中文本的倾斜 Args: image: 输入图像的NumPy数组 angle_range: 搜索倾斜角度的范围 angle_step: 角度搜索的步长 Returns: 修正倾斜后的图像 """ # 确保图像不为空 if image is None or image.size == 0: raise ValueError("输入图像为空") # 转换为灰度图 if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() # 二值化 _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # 计算每个旋转角度的像素和 scores = [] angles = np.arange(angle_range[0], angle_range[1] + angle_step, angle_step) # 获取中心点 center = (binary.shape[1] // 2, binary.shape[0] // 2) for angle in angles: # 旋转图像 rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(binary, rotation_matrix, (binary.shape[1], binary.shape[0]), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=0) # 计算每行像素和 row_sums = np.sum(rotated, axis=1) # 计算方差作为评分 score = np.var(row_sums) scores.append(score) # 找到最佳角度 best_angle_index = np.argmax(scores) best_angle = angles[best_angle_index] # 旋转原始图像 rotation_matrix = cv2.getRotationMatrix2D(center, best_angle, 1.0) rotated_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT) return rotated_image def process_image_for_chinese_ocr(image: np.ndarray, correct_skew: bool = True) -> Dict[str, Any]: """ 完整的中文OCR图像预处理流程 Args: image: 输入图像的NumPy数组 correct_skew: 是否进行倾斜校正 Returns: 字典,包含多种处理结果和原始图像 """ result = { 'original': image.copy() } # 步骤1: 倾斜校正(如果需要) if correct_skew: corrected = detect_and_correct_skew(image) result['deskewed'] = corrected # 使用校正后的图像进行后续处理 working_image = corrected else: working_image = image # 步骤2: 应用基本的中文优化 optimized = optimize_for_chinese(working_image) result['optimized'] = optimized # 步骤3: 应用高级优化,获取多种处理结果 advanced_results = optimize_for_chinese_advanced(working_image) for i, img in enumerate(advanced_results): result[f'method_{i+1}'] = img return result if __name__ == "__main__": # 简单的测试代码 import sys if len(sys.argv) > 1: input_image_path = sys.argv[1] output_dir = sys.argv[2] if len(sys.argv) > 2 else "." # 读取图像 image = cv2.imread(input_image_path) if image is None: print(f"无法读取图像: {input_image_path}") sys.exit(1) # 处理图像 result = process_image_for_chinese_ocr(image) # 保存结果 cv2.imwrite(f"{output_dir}/original.png", result['original']) cv2.imwrite(f"{output_dir}/optimized.png", result['optimized']) if 'deskewed' in result: cv2.imwrite(f"{output_dir}/deskewed.png", result['deskewed']) for i in range(1, 10): key = f'method_{i}' if key in result: cv2.imwrite(f"{output_dir}/{key}.png", result[key]) print(f"处理完成,结果已保存到 {output_dir}") else: print("使用方法: python _optimize_for_chinese.py <输入图像路径> [输出目录]")