doc-etl/cxs/_optimize_for_chinese.py
2025-05-16 11:30:02 +08:00

285 lines
9.3 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
针对中文OCR的图像预处理优化
"""
import cv2
import numpy as np
from typing import Optional, Tuple, List, Dict, Any
def optimize_for_chinese(image: np.ndarray) -> np.ndarray:
"""
针对中文文本的图像优化处理
Args:
image: 输入图像的NumPy数组
Returns:
优化后的图像NumPy数组
"""
# 确保图像不为空
if image is None or image.size == 0:
raise ValueError("输入图像为空")
# 转换为灰度图
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 1. 自适应二值化 - 对于不同分辨率和对比度的图像很有效
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 25, 15
)
# 2. 对二值化图像进行形态学操作,使文字更清晰
# 创建一个长方形核,水平方向较小,垂直方向较大
# 这有助于保持中文字符的笔画连接
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 3))
# 闭运算 - 用于连接断开的部分,尤其对于中文细笔画非常有效
morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1)
# 3. 降噪 - 去除小的噪点
# 查找所有轮廓
contours, _ = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 创建一个空白图像
cleaned = np.zeros_like(morph)
# 筛选轮廓 - 保留较大的轮廓(文字),去除较小的轮廓(噪点)
min_contour_area = 20 # 最小轮廓面积,可以根据实际情况调整
for contour in contours:
if cv2.contourArea(contour) > min_contour_area:
cv2.drawContours(cleaned, [contour], -1, 255, -1)
# 4. 反转回来 - 因为OCR通常需要黑底白字
cleaned_inverted = cv2.bitwise_not(cleaned)
# 5. 对图像进行锐化,提高轮廓清晰度
# 创建一个锐化核
sharpen_kernel = np.array([[-1,-1,-1],
[-1, 9,-1],
[-1,-1,-1]])
sharpened = cv2.filter2D(cleaned_inverted, -1, sharpen_kernel)
# 6. 确保图像完全二值化
_, final = cv2.threshold(sharpened, 127, 255, cv2.THRESH_BINARY)
return final
def optimize_for_chinese_advanced(image: np.ndarray) -> List[np.ndarray]:
"""
针对中文文本的多种高级图像优化处理,返回多种优化结果
Args:
image: 输入图像的NumPy数组
Returns:
优化后的图像NumPy数组列表
"""
# 确保图像不为空
if image is None or image.size == 0:
raise ValueError("输入图像为空")
# 转换为灰度图
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
results = []
# 方法1: 自适应二值化基础版
binary1 = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 25, 15
)
results.append(binary1)
# 方法2: 自适应二值化增强版
binary2 = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 35, 15
)
results.append(binary2)
# 方法3: Otsu二值化
_, binary3 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
results.append(binary3)
# 方法4: 应用高斯模糊后再Otsu二值化
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
_, binary4 = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
results.append(binary4)
# 方法5: 增强对比度后的二值化
# 创建CLAHE对象
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# 应用CLAHE增强对比度
contrast_enhanced = clahe.apply(gray)
_, binary5 = cv2.threshold(contrast_enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
results.append(binary5)
# 方法6: 使用基本优化函数
basic_optimized = optimize_for_chinese(image)
results.append(basic_optimized)
# 方法7: 形态学操作
# 先进行二值化
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 创建一个椭圆核
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
# 开运算去除噪点
opened = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1)
# 闭运算连接断开的笔画
morph = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel, iterations=1)
results.append(morph)
# 方法8: 锐化处理
sharpen_kernel = np.array([[-1,-1,-1],
[-1, 9,-1],
[-1,-1,-1]])
sharpened = cv2.filter2D(gray, -1, sharpen_kernel)
_, binary8 = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
results.append(binary8)
# 方法9: 边缘增强
# 先进行高斯模糊
blurred = cv2.GaussianBlur(gray, (0, 0), 3)
# 使用unsharp masking技术
edge_enhanced = cv2.addWeighted(gray, 1.5, blurred, -0.5, 0)
_, binary9 = cv2.threshold(edge_enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
results.append(binary9)
return results
def detect_and_correct_skew(image: np.ndarray, angle_range: Tuple[int, int] = (-15, 15), angle_step: float = 0.5) -> np.ndarray:
"""
检测并修正图像中文本的倾斜
Args:
image: 输入图像的NumPy数组
angle_range: 搜索倾斜角度的范围
angle_step: 角度搜索的步长
Returns:
修正倾斜后的图像
"""
# 确保图像不为空
if image is None or image.size == 0:
raise ValueError("输入图像为空")
# 转换为灰度图
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# 二值化
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 计算每个旋转角度的像素和
scores = []
angles = np.arange(angle_range[0], angle_range[1] + angle_step, angle_step)
# 获取中心点
center = (binary.shape[1] // 2, binary.shape[0] // 2)
for angle in angles:
# 旋转图像
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(binary, rotation_matrix, (binary.shape[1], binary.shape[0]),
flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
# 计算每行像素和
row_sums = np.sum(rotated, axis=1)
# 计算方差作为评分
score = np.var(row_sums)
scores.append(score)
# 找到最佳角度
best_angle_index = np.argmax(scores)
best_angle = angles[best_angle_index]
# 旋转原始图像
rotation_matrix = cv2.getRotationMatrix2D(center, best_angle, 1.0)
rotated_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]),
flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
return rotated_image
def process_image_for_chinese_ocr(image: np.ndarray, correct_skew: bool = True) -> Dict[str, Any]:
"""
完整的中文OCR图像预处理流程
Args:
image: 输入图像的NumPy数组
correct_skew: 是否进行倾斜校正
Returns:
字典,包含多种处理结果和原始图像
"""
result = {
'original': image.copy()
}
# 步骤1: 倾斜校正(如果需要)
if correct_skew:
corrected = detect_and_correct_skew(image)
result['deskewed'] = corrected
# 使用校正后的图像进行后续处理
working_image = corrected
else:
working_image = image
# 步骤2: 应用基本的中文优化
optimized = optimize_for_chinese(working_image)
result['optimized'] = optimized
# 步骤3: 应用高级优化,获取多种处理结果
advanced_results = optimize_for_chinese_advanced(working_image)
for i, img in enumerate(advanced_results):
result[f'method_{i+1}'] = img
return result
if __name__ == "__main__":
# 简单的测试代码
import sys
if len(sys.argv) > 1:
input_image_path = sys.argv[1]
output_dir = sys.argv[2] if len(sys.argv) > 2 else "."
# 读取图像
image = cv2.imread(input_image_path)
if image is None:
print(f"无法读取图像: {input_image_path}")
sys.exit(1)
# 处理图像
result = process_image_for_chinese_ocr(image)
# 保存结果
cv2.imwrite(f"{output_dir}/original.png", result['original'])
cv2.imwrite(f"{output_dir}/optimized.png", result['optimized'])
if 'deskewed' in result:
cv2.imwrite(f"{output_dir}/deskewed.png", result['deskewed'])
for i in range(1, 10):
key = f'method_{i}'
if key in result:
cv2.imwrite(f"{output_dir}/{key}.png", result[key])
print(f"处理完成,结果已保存到 {output_dir}")
else:
print("使用方法: python _optimize_for_chinese.py <输入图像路径> [输出目录]")