chore: 提交除 const.py 外的所有修改

1. 新增文件:
   - __sync_prod_db.py - 数据库同步脚本
   - uploads/ 目录下的PDF文档

2. 修改文件:
   - app/api/service/documentSourceService.py - 文档源服务
   - requirements.txt - 依赖配置
This commit is contained in:
qiaoxinjiu
2026-05-18 11:59:46 +08:00
parent 921b1f7cd8
commit 3cc3dbe5d2
8 changed files with 12 additions and 2 deletions

0
__sync_prod_db.py Normal file
View File

View File

@@ -225,15 +225,21 @@ class DocumentSourceService:
def _extract_content_from_pdf(pdf_path):
"""提取PDF内容"""
try:
from flask import current_app
from PyPDF2 import PdfReader
file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
current_app.logger.info(f'开始提取PDF内容: path={pdf_path}, size={file_size}')
reader = PdfReader(pdf_path)
content = ''
for page in reader.pages:
page_content = page.extract_text()
if page_content:
content += page_content + '\n'
current_app.logger.info(f'PDF内容提取完成: path={pdf_path}, pages={len(reader.pages)}, content_length={len(content)}')
return content
except Exception:
except Exception as e:
from flask import current_app
current_app.logger.exception(f'PDF内容提取失败: path={pdf_path}, error={str(e)}')
return ''
@staticmethod
@@ -269,7 +275,8 @@ class DocumentSourceService:
# 提取PDF内容
content = DocumentSourceService._extract_content_from_pdf(pdf_path)
if not content:
failed_docs.append({'documentId': doc_id, 'error': 'PDF内容为空'})
file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
failed_docs.append({'documentId': doc_id, 'error': f'PDF内容为空文件大小{file_size} bytes。请检查服务器是否安装PyPDF2、文件是否为扫描件或加密PDF'})
continue
# 更新文档内容

View File

@@ -7,6 +7,9 @@ PyMySQL~=0.10.0
psycopg2-binary~=2.9.9
python-jenkins~=1.7.0
requests~=2.26.0
openai~=1.30.0
httpx~=0.27.0
PyPDF2~=3.0.1
Flask-Docs~=0.6.4
flask_redis~=0.4.0
jira~=3.0.1