285 lines
9.3 KiB
Python
285 lines
9.3 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
针对中文OCR的图像预处理优化
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from typing import Optional, Tuple, List, Dict, Any
|
|
|
|
|
|
def optimize_for_chinese(image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
针对中文文本的图像优化处理
|
|
|
|
Args:
|
|
image: 输入图像的NumPy数组
|
|
|
|
Returns:
|
|
优化后的图像NumPy数组
|
|
"""
|
|
# 确保图像不为空
|
|
if image is None or image.size == 0:
|
|
raise ValueError("输入图像为空")
|
|
|
|
# 转换为灰度图
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = image.copy()
|
|
|
|
# 1. 自适应二值化 - 对于不同分辨率和对比度的图像很有效
|
|
binary = cv2.adaptiveThreshold(
|
|
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY_INV, 25, 15
|
|
)
|
|
|
|
# 2. 对二值化图像进行形态学操作,使文字更清晰
|
|
# 创建一个长方形核,水平方向较小,垂直方向较大
|
|
# 这有助于保持中文字符的笔画连接
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 3))
|
|
|
|
# 闭运算 - 用于连接断开的部分,尤其对于中文细笔画非常有效
|
|
morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1)
|
|
|
|
# 3. 降噪 - 去除小的噪点
|
|
# 查找所有轮廓
|
|
contours, _ = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
|
|
# 创建一个空白图像
|
|
cleaned = np.zeros_like(morph)
|
|
|
|
# 筛选轮廓 - 保留较大的轮廓(文字),去除较小的轮廓(噪点)
|
|
min_contour_area = 20 # 最小轮廓面积,可以根据实际情况调整
|
|
for contour in contours:
|
|
if cv2.contourArea(contour) > min_contour_area:
|
|
cv2.drawContours(cleaned, [contour], -1, 255, -1)
|
|
|
|
# 4. 反转回来 - 因为OCR通常需要黑底白字
|
|
cleaned_inverted = cv2.bitwise_not(cleaned)
|
|
|
|
# 5. 对图像进行锐化,提高轮廓清晰度
|
|
# 创建一个锐化核
|
|
sharpen_kernel = np.array([[-1,-1,-1],
|
|
[-1, 9,-1],
|
|
[-1,-1,-1]])
|
|
|
|
sharpened = cv2.filter2D(cleaned_inverted, -1, sharpen_kernel)
|
|
|
|
# 6. 确保图像完全二值化
|
|
_, final = cv2.threshold(sharpened, 127, 255, cv2.THRESH_BINARY)
|
|
|
|
return final
|
|
|
|
|
|
def optimize_for_chinese_advanced(image: np.ndarray) -> List[np.ndarray]:
|
|
"""
|
|
针对中文文本的多种高级图像优化处理,返回多种优化结果
|
|
|
|
Args:
|
|
image: 输入图像的NumPy数组
|
|
|
|
Returns:
|
|
优化后的图像NumPy数组列表
|
|
"""
|
|
# 确保图像不为空
|
|
if image is None or image.size == 0:
|
|
raise ValueError("输入图像为空")
|
|
|
|
# 转换为灰度图
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = image.copy()
|
|
|
|
results = []
|
|
|
|
# 方法1: 自适应二值化基础版
|
|
binary1 = cv2.adaptiveThreshold(
|
|
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY, 25, 15
|
|
)
|
|
results.append(binary1)
|
|
|
|
# 方法2: 自适应二值化增强版
|
|
binary2 = cv2.adaptiveThreshold(
|
|
gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
|
|
cv2.THRESH_BINARY, 35, 15
|
|
)
|
|
results.append(binary2)
|
|
|
|
# 方法3: Otsu二值化
|
|
_, binary3 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
results.append(binary3)
|
|
|
|
# 方法4: 应用高斯模糊后再Otsu二值化
|
|
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
|
_, binary4 = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
results.append(binary4)
|
|
|
|
# 方法5: 增强对比度后的二值化
|
|
# 创建CLAHE对象
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
|
# 应用CLAHE增强对比度
|
|
contrast_enhanced = clahe.apply(gray)
|
|
_, binary5 = cv2.threshold(contrast_enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
results.append(binary5)
|
|
|
|
# 方法6: 使用基本优化函数
|
|
basic_optimized = optimize_for_chinese(image)
|
|
results.append(basic_optimized)
|
|
|
|
# 方法7: 形态学操作
|
|
# 先进行二值化
|
|
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
# 创建一个椭圆核
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
|
|
# 开运算去除噪点
|
|
opened = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1)
|
|
# 闭运算连接断开的笔画
|
|
morph = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, kernel, iterations=1)
|
|
results.append(morph)
|
|
|
|
# 方法8: 锐化处理
|
|
sharpen_kernel = np.array([[-1,-1,-1],
|
|
[-1, 9,-1],
|
|
[-1,-1,-1]])
|
|
sharpened = cv2.filter2D(gray, -1, sharpen_kernel)
|
|
_, binary8 = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
results.append(binary8)
|
|
|
|
# 方法9: 边缘增强
|
|
# 先进行高斯模糊
|
|
blurred = cv2.GaussianBlur(gray, (0, 0), 3)
|
|
# 使用unsharp masking技术
|
|
edge_enhanced = cv2.addWeighted(gray, 1.5, blurred, -0.5, 0)
|
|
_, binary9 = cv2.threshold(edge_enhanced, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
results.append(binary9)
|
|
|
|
return results
|
|
|
|
|
|
def detect_and_correct_skew(image: np.ndarray, angle_range: Tuple[int, int] = (-15, 15), angle_step: float = 0.5) -> np.ndarray:
|
|
"""
|
|
检测并修正图像中文本的倾斜
|
|
|
|
Args:
|
|
image: 输入图像的NumPy数组
|
|
angle_range: 搜索倾斜角度的范围
|
|
angle_step: 角度搜索的步长
|
|
|
|
Returns:
|
|
修正倾斜后的图像
|
|
"""
|
|
# 确保图像不为空
|
|
if image is None or image.size == 0:
|
|
raise ValueError("输入图像为空")
|
|
|
|
# 转换为灰度图
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = image.copy()
|
|
|
|
# 二值化
|
|
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
|
|
|
|
# 计算每个旋转角度的像素和
|
|
scores = []
|
|
angles = np.arange(angle_range[0], angle_range[1] + angle_step, angle_step)
|
|
|
|
# 获取中心点
|
|
center = (binary.shape[1] // 2, binary.shape[0] // 2)
|
|
|
|
for angle in angles:
|
|
# 旋转图像
|
|
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
rotated = cv2.warpAffine(binary, rotation_matrix, (binary.shape[1], binary.shape[0]),
|
|
flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=0)
|
|
|
|
# 计算每行像素和
|
|
row_sums = np.sum(rotated, axis=1)
|
|
# 计算方差作为评分
|
|
score = np.var(row_sums)
|
|
scores.append(score)
|
|
|
|
# 找到最佳角度
|
|
best_angle_index = np.argmax(scores)
|
|
best_angle = angles[best_angle_index]
|
|
|
|
# 旋转原始图像
|
|
rotation_matrix = cv2.getRotationMatrix2D(center, best_angle, 1.0)
|
|
rotated_image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]),
|
|
flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)
|
|
|
|
return rotated_image
|
|
|
|
|
|
def process_image_for_chinese_ocr(image: np.ndarray, correct_skew: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
完整的中文OCR图像预处理流程
|
|
|
|
Args:
|
|
image: 输入图像的NumPy数组
|
|
correct_skew: 是否进行倾斜校正
|
|
|
|
Returns:
|
|
字典,包含多种处理结果和原始图像
|
|
"""
|
|
result = {
|
|
'original': image.copy()
|
|
}
|
|
|
|
# 步骤1: 倾斜校正(如果需要)
|
|
if correct_skew:
|
|
corrected = detect_and_correct_skew(image)
|
|
result['deskewed'] = corrected
|
|
# 使用校正后的图像进行后续处理
|
|
working_image = corrected
|
|
else:
|
|
working_image = image
|
|
|
|
# 步骤2: 应用基本的中文优化
|
|
optimized = optimize_for_chinese(working_image)
|
|
result['optimized'] = optimized
|
|
|
|
# 步骤3: 应用高级优化,获取多种处理结果
|
|
advanced_results = optimize_for_chinese_advanced(working_image)
|
|
for i, img in enumerate(advanced_results):
|
|
result[f'method_{i+1}'] = img
|
|
|
|
return result
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# 简单的测试代码
|
|
import sys
|
|
if len(sys.argv) > 1:
|
|
input_image_path = sys.argv[1]
|
|
output_dir = sys.argv[2] if len(sys.argv) > 2 else "."
|
|
|
|
# 读取图像
|
|
image = cv2.imread(input_image_path)
|
|
if image is None:
|
|
print(f"无法读取图像: {input_image_path}")
|
|
sys.exit(1)
|
|
|
|
# 处理图像
|
|
result = process_image_for_chinese_ocr(image)
|
|
|
|
# 保存结果
|
|
cv2.imwrite(f"{output_dir}/original.png", result['original'])
|
|
cv2.imwrite(f"{output_dir}/optimized.png", result['optimized'])
|
|
|
|
if 'deskewed' in result:
|
|
cv2.imwrite(f"{output_dir}/deskewed.png", result['deskewed'])
|
|
|
|
for i in range(1, 10):
|
|
key = f'method_{i}'
|
|
if key in result:
|
|
cv2.imwrite(f"{output_dir}/{key}.png", result[key])
|
|
|
|
print(f"处理完成,结果已保存到 {output_dir}")
|
|
else:
|
|
print("使用方法: python _optimize_for_chinese.py <输入图像路径> [输出目录]") |