表格问题修复
This commit is contained in:
parent
dfa2b47b11
commit
dad699a157
13
README.md
13
README.md
@ -454,6 +454,19 @@ pip install -r requirements.txt
|
||||
- 自动清理"表格无有效数据"等无效提示信息
|
||||
- 优化文本拼接逻辑,确保输出格式的一致性
|
||||
|
||||
### 2024年6月20日
|
||||
- **修复表格HTML生成错误**
|
||||
- 解决了`_generate_table_html_tags`方法中`cell_map`变量未定义的错误
|
||||
- 完全重写了表格HTML生成逻辑,确保表格能正确转换为HTML格式
|
||||
- 优化表格头部和主体分别处理的流程
|
||||
- 添加了合并单元格的正确属性(rowspan和colspan)
|
||||
- 完善了表格结构分析过程,准确标记垂直和水平合并的单元格
|
||||
- 改进HTML标签属性生成逻辑,确保输出符合HTML规范
|
||||
- 增强了错误处理,提供具体的单元格位置信息方便调试
|
||||
- 确保表格信息完整转换为HTML格式,解决"表格的html没有正常输出"问题
|
||||
|
||||
### 2024年6月19日
|
||||
|
||||
## 安装说明
|
||||
|
||||
1. 克隆项目代码
|
||||
|
@ -772,6 +772,7 @@ class DocCleaner:
|
||||
# 分析表格结构(查找合并单元格)
|
||||
merged_cells = {}
|
||||
merged_v_cells = set() # 记录被垂直合并的单元格
|
||||
cell_map = {} # 添加cell_map的定义
|
||||
|
||||
# 检测合并单元格
|
||||
for i in range(rows):
|
||||
@ -799,49 +800,106 @@ class DocCleaner:
|
||||
except Exception as e:
|
||||
print(f"警告:处理合并单元格时出错 [{i},{j}]: {str(e)}")
|
||||
|
||||
# 第二遍:复制内容并执行合并
|
||||
for i in range(rows):
|
||||
for j in range(cols):
|
||||
# 构建HTML表格
|
||||
html = f'<table id="{table_id}" class="docx-table">\n'
|
||||
html += '<thead>\n'
|
||||
|
||||
# 添加表头行
|
||||
header_rows = min(1, rows) # 假设第一行是表头
|
||||
for i in range(header_rows):
|
||||
html += ' <tr>\n'
|
||||
j = 0
|
||||
while j < cols:
|
||||
try:
|
||||
src_cell = table.cell(i, j)
|
||||
dst_cell = new_table.cell(i, j)
|
||||
cell = table.cell(i, j)
|
||||
text = cell.text.strip()
|
||||
|
||||
# 检查是否是合并单元格
|
||||
rowspan = 1
|
||||
colspan = 1
|
||||
|
||||
# 检查是否需要合并
|
||||
if (i, j) in cell_map:
|
||||
merge_type, span = cell_map[(i, j)]
|
||||
if merge_type == 'vmerge':
|
||||
# 垂直合并
|
||||
rowspan = span
|
||||
elif merge_type == 'hmerge':
|
||||
colspan = span
|
||||
|
||||
# 添加表头单元格
|
||||
attrs = []
|
||||
if rowspan > 1:
|
||||
attrs.append(f'rowspan="{rowspan}"')
|
||||
if colspan > 1:
|
||||
attrs.append(f'colspan="{colspan}"')
|
||||
|
||||
attrs_str = ' '.join(attrs)
|
||||
if attrs_str:
|
||||
attrs_str = ' ' + attrs_str
|
||||
|
||||
html += f' <th{attrs_str}>{text}</th>\n'
|
||||
|
||||
# 如果是水平合并,跳过合并的列
|
||||
j += colspan
|
||||
except Exception as e:
|
||||
print(f"警告:处理表头单元格时出错 [{i},{j}]: {str(e)}")
|
||||
html += f' <th>错误: {str(e)}</th>\n'
|
||||
j += 1
|
||||
html += ' </tr>\n'
|
||||
|
||||
html += '</thead>\n<tbody>\n'
|
||||
|
||||
# 添加数据行
|
||||
for i in range(header_rows, rows):
|
||||
html += ' <tr>\n'
|
||||
j = 0
|
||||
while j < cols:
|
||||
try:
|
||||
# 跳过已经被垂直合并的单元格
|
||||
if (i, j) in merged_v_cells:
|
||||
j += 1
|
||||
continue
|
||||
|
||||
cell = table.cell(i, j)
|
||||
text = cell.text.strip()
|
||||
|
||||
# 检查是否是合并单元格
|
||||
rowspan = 1
|
||||
colspan = 1
|
||||
|
||||
if (i, j) in cell_map:
|
||||
merge_type, span = cell_map[(i, j)]
|
||||
if merge_type == 'vmerge':
|
||||
rowspan = span
|
||||
# 标记被垂直合并的单元格
|
||||
for k in range(1, span):
|
||||
if i + k < rows:
|
||||
dst_cell.merge(new_table.cell(i + k, j))
|
||||
merged_v_cells.add((i + k, j))
|
||||
elif merge_type == 'hmerge':
|
||||
# 水平合并
|
||||
for k in range(1, span):
|
||||
if j + k < cols:
|
||||
dst_cell.merge(new_table.cell(i, j + k))
|
||||
colspan = span
|
||||
|
||||
# 复制单元格属性
|
||||
if src_cell._element.tcPr is not None:
|
||||
dst_cell._element.tcPr = deepcopy(src_cell._element.tcPr)
|
||||
# 添加数据单元格
|
||||
attrs = []
|
||||
if rowspan > 1:
|
||||
attrs.append(f'rowspan="{rowspan}"')
|
||||
if colspan > 1:
|
||||
attrs.append(f'colspan="{colspan}"')
|
||||
|
||||
# 复制单元格内容
|
||||
dst_cell.text = "" # 清除默认内容
|
||||
for src_paragraph in src_cell.paragraphs:
|
||||
dst_paragraph = dst_cell.add_paragraph()
|
||||
# 复制段落属性
|
||||
if src_paragraph._element.pPr is not None:
|
||||
dst_paragraph._element.pPr = deepcopy(src_paragraph._element.pPr)
|
||||
|
||||
# 复制文本和格式
|
||||
for src_run in src_paragraph.runs:
|
||||
dst_run = dst_paragraph.add_run(src_run.text)
|
||||
# 复制运行属性
|
||||
if src_run._element.rPr is not None:
|
||||
dst_run._element.rPr = deepcopy(src_run._element.rPr)
|
||||
|
||||
attrs_str = ' '.join(attrs)
|
||||
if attrs_str:
|
||||
attrs_str = ' ' + attrs_str
|
||||
|
||||
html += f' <td{attrs_str}>{text}</td>\n'
|
||||
|
||||
# 如果是水平合并,跳过合并的列
|
||||
j += colspan
|
||||
except Exception as e:
|
||||
print(f"警告:复制单元格时出错 [{i},{j}]: {str(e)}")
|
||||
continue
|
||||
print(f"警告:处理数据单元格时出错 [{i},{j}]: {str(e)}")
|
||||
html += f' <td>错误: {str(e)}</td>\n'
|
||||
j += 1
|
||||
html += ' </tr>\n'
|
||||
|
||||
html += '</tbody>\n</table>'
|
||||
return html
|
||||
|
||||
def _get_vmerge_value(self, cell_element) -> str:
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user