文档清洗系统初始化脚本

This commit is contained in:
cxs
2025-05-16 11:30:02 +08:00
parent a73040d739
commit 532eb2857c
29 changed files with 11568 additions and 225 deletions

468
cxs/static/index.html Normal file
View File

@@ -0,0 +1,468 @@
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>文档处理系统</title>
<style>
body {
font-family: 'Microsoft YaHei', sans-serif;
max-width: 1000px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
.container {
background-color: white;
padding: 30px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
h1 {
color: #333;
text-align: center;
margin-bottom: 30px;
}
.upload-area {
border: 2px dashed #ccc;
padding: 20px;
text-align: center;
margin-bottom: 20px;
border-radius: 4px;
cursor: pointer;
transition: all 0.3s ease;
}
.upload-area:hover {
border-color: #666;
}
.upload-area.dragover {
border-color: #4CAF50;
background-color: #E8F5E9;
}
#file-input {
display: none;
}
.btn {
background-color: #4CAF50;
color: white;
padding: 10px 20px;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
transition: background-color 0.3s ease;
margin: 0 5px;
}
.btn:hover {
background-color: #45a049;
}
.btn:disabled {
background-color: #cccccc;
cursor: not-allowed;
}
#status {
margin-top: 20px;
padding: 10px;
border-radius: 4px;
display: none;
}
.success {
background-color: #E8F5E9;
color: #2E7D32;
}
.error {
background-color: #FFEBEE;
color: #C62828;
}
.file-list {
margin: 20px 0;
max-height: 300px;
overflow-y: auto;
}
.file-item {
display: flex;
align-items: center;
justify-content: space-between;
padding: 10px;
border: 1px solid #ddd;
margin-bottom: 5px;
border-radius: 4px;
}
.file-item .progress-container {
flex: 1;
margin: 0 20px;
background-color: #f0f0f0;
border-radius: 10px;
overflow: hidden;
}
.file-item .progress-bar {
height: 20px;
background-color: #4CAF50;
width: 0%;
transition: width 0.3s ease;
border-radius: 10px;
position: relative;
}
.progress-text {
position: absolute;
width: 100%;
text-align: center;
color: white;
font-size: 12px;
line-height: 20px;
}
.file-item .remove-btn {
background-color: #f44336;
color: white;
border: none;
padding: 5px 10px;
border-radius: 3px;
cursor: pointer;
}
.result-container {
margin-top: 20px;
border-top: 1px solid #ddd;
padding-top: 20px;
}
.result-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
border: 1px solid #ddd;
margin-bottom: 5px;
border-radius: 4px;
background-color: #fff;
}
.result-item.error {
background-color: #FFEBEE;
}
.result-item.success {
background-color: #E8F5E9;
}
.result-info {
flex: 1;
margin-right: 10px;
}
.button-group {
text-align: center;
margin: 20px 0;
}
.result-text {
max-height: 300px;
overflow-y: auto;
border: 1px solid #ddd;
padding: 10px;
margin-top: 10px;
background-color: #fff;
border-radius: 4px;
white-space: pre-wrap;
display: none;
}
.result-buttons {
display: flex;
gap: 10px;
}
</style>
</head>
<body>
<div class="container">
<h1>文档处理系统</h1>
<div class="upload-area" id="drop-area">
<p>点击或拖拽文件到此处上传</p>
<p>支持的格式:.doc, .docx, .pdf, .html, .htm, .xls, .xlsx</p>
<p>可以同时选择多个文件</p>
<input type="file" id="file-input" accept=".doc,.docx,.pdf,.html,.htm,.xls,.xlsx" multiple>
</div>
<div class="file-list" id="file-list"></div>
<div class="button-group">
<button id="upload-btn" class="btn" disabled>开始处理</button>
<button id="clear-btn" class="btn" style="background-color: #f44336;">清空列表</button>
</div>
<div id="status"></div>
<div class="result-container">
<h2>处理结果</h2>
<div id="result-list"></div>
</div>
</div>
<script>
const dropArea = document.getElementById('drop-area');
const fileInput = document.getElementById('file-input');
const uploadBtn = document.getElementById('upload-btn');
const clearBtn = document.getElementById('clear-btn');
const status = document.getElementById('status');
const fileList = document.getElementById('file-list');
const resultList = document.getElementById('result-list');
let files = new Map(); // 存储待处理的文件
let processing = false; // 是否正在处理文件
// 处理拖拽事件
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
dropArea.addEventListener(eventName, preventDefaults, false);
});
function preventDefaults(e) {
e.preventDefault();
e.stopPropagation();
}
['dragenter', 'dragover'].forEach(eventName => {
dropArea.addEventListener(eventName, highlight, false);
});
['dragleave', 'drop'].forEach(eventName => {
dropArea.addEventListener(eventName, unhighlight, false);
});
function highlight(e) {
dropArea.classList.add('dragover');
}
function unhighlight(e) {
dropArea.classList.remove('dragover');
}
// 处理文件拖放
dropArea.addEventListener('drop', handleDrop, false);
function handleDrop(e) {
const dt = e.dataTransfer;
handleFiles(Array.from(dt.files));
}
// 点击上传区域触发文件选择
dropArea.addEventListener('click', () => {
fileInput.click();
});
fileInput.addEventListener('change', function() {
handleFiles(Array.from(this.files));
this.value = ''; // 清空input允许重复选择相同文件
});
// 清空按钮事件
clearBtn.addEventListener('click', () => {
if (!processing) {
files.clear();
updateFileList();
uploadBtn.disabled = true;
}
});
function handleFiles(newFiles) {
const validTypes = ['.doc', '.docx', '.pdf', '.html', '.htm', '.xls', '.xlsx'];
newFiles.forEach(file => {
const fileExtension = file.name.toLowerCase().slice(file.name.lastIndexOf('.'));
if (validTypes.includes(fileExtension)) {
files.set(file.name, {
file: file,
progress: 0,
status: 'pending' // pending, processing, completed, error
});
}
});
updateFileList();
uploadBtn.disabled = files.size === 0;
}
function updateFileList() {
fileList.innerHTML = '';
files.forEach((fileData, fileName) => {
const fileItem = document.createElement('div');
fileItem.className = 'file-item';
const nameSpan = document.createElement('span');
nameSpan.textContent = fileName;
const progressContainer = document.createElement('div');
progressContainer.className = 'progress-container';
const progressBar = document.createElement('div');
progressBar.className = 'progress-bar';
progressBar.style.width = fileData.progress + '%';
const progressText = document.createElement('div');
progressText.className = 'progress-text';
progressText.textContent = fileData.progress + '%';
const removeBtn = document.createElement('button');
removeBtn.className = 'remove-btn';
removeBtn.textContent = '删除';
removeBtn.onclick = () => {
if (!processing) {
files.delete(fileName);
updateFileList();
uploadBtn.disabled = files.size === 0;
}
};
progressBar.appendChild(progressText);
progressContainer.appendChild(progressBar);
fileItem.appendChild(nameSpan);
fileItem.appendChild(progressContainer);
fileItem.appendChild(removeBtn);
fileList.appendChild(fileItem);
});
}
// 处理文件上传
uploadBtn.addEventListener('click', async () => {
if (processing || files.size === 0) return;
processing = true;
uploadBtn.disabled = true;
status.style.display = 'none';
resultList.innerHTML = '';
try {
const results = [];
// 一个一个处理文件
for (const [fileName, fileData] of files.entries()) {
const formData = new FormData();
formData.append('files', fileData.file);
// 更新进度显示
fileData.status = 'processing';
updateFileList();
try {
const response = await fetch('/api/upload/', {
method: 'POST',
body: formData,
credentials: 'same-origin'
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const result = await response.json();
console.log(`文件 ${fileName} 处理结果:`, result); // 调试日志
if (result.error) {
fileData.status = 'error';
showMessage(`文件 ${fileName} 处理失败: ${result.error}`);
} else if (result.results && result.results.length > 0) {
fileData.status = 'completed';
results.push(...result.results);
}
} catch (error) {
console.error(`文件 ${fileName} 处理错误:`, error);
fileData.status = 'error';
showMessage(`文件 ${fileName} 处理失败: ${error.message}`);
}
// 更新进度显示
fileData.progress = 100;
updateFileList();
// 等待一小段时间,确保文件处理完成
await new Promise(resolve => setTimeout(resolve, 500));
}
// 显示所有处理结果
displayResults(results);
} catch (error) {
console.error('处理错误:', error);
showMessage(`处理失败: ${error.message}`);
} finally {
processing = false;
uploadBtn.disabled = false;
files.clear();
updateFileList();
}
});
async function displayResults(results) {
if (results.length === 0) {
showMessage('没有文件被处理');
return;
}
results.forEach(result => {
const resultItem = document.createElement('div');
resultItem.className = `result-item ${result.status}`;
const resultInfo = document.createElement('div');
resultInfo.className = 'result-info';
if (result.status === 'success') {
resultInfo.innerHTML = `<strong>${result.filename}</strong> 处理成功`;
const buttonsDiv = document.createElement('div');
buttonsDiv.className = 'result-buttons';
// 下载TXT按钮
if (result.output_file) {
const downloadBtn = document.createElement('button');
downloadBtn.className = 'btn';
downloadBtn.textContent = '下载TXT';
downloadBtn.onclick = () => {
window.location.href = `/api/download/${result.output_file}`;
};
buttonsDiv.appendChild(downloadBtn);
}
// 下载Markdown按钮
if (result.markdown_file) {
const downloadMarkdownBtn = document.createElement('button');
downloadMarkdownBtn.className = 'btn';
downloadMarkdownBtn.style.backgroundColor = '#2196F3'; // 使用不同的颜色区分
downloadMarkdownBtn.textContent = '下载MD';
downloadMarkdownBtn.onclick = () => {
window.location.href = `/api/download/${result.markdown_file}`;
};
buttonsDiv.appendChild(downloadMarkdownBtn);
}
// 查看内容按钮
if (result.content) {
const showTextBtn = document.createElement('button');
showTextBtn.className = 'btn';
showTextBtn.textContent = '查看内容';
const textDiv = document.createElement('div');
textDiv.className = 'result-text';
textDiv.textContent = result.content;
textDiv.style.display = 'none';
showTextBtn.onclick = () => {
const isVisible = textDiv.style.display === 'block';
textDiv.style.display = isVisible ? 'none' : 'block';
showTextBtn.textContent = isVisible ? '查看内容' : '隐藏内容';
};
buttonsDiv.appendChild(showTextBtn);
resultItem.appendChild(textDiv);
}
resultItem.appendChild(resultInfo);
resultItem.appendChild(buttonsDiv);
} else {
resultInfo.innerHTML = `<strong>${result.filename}</strong> 处理失败: ${result.error || '未知错误'}`;
resultItem.appendChild(resultInfo);
}
resultList.appendChild(resultItem);
});
}
function showMessage(message) {
const statusDiv = document.getElementById('status');
statusDiv.textContent = message;
statusDiv.className = 'error';
statusDiv.style.display = 'block';
setTimeout(() => {
statusDiv.style.display = 'none';
statusDiv.textContent = '';
statusDiv.className = '';
}, 3000);
}
</script>
</body>
</html>