Compare commits

...

2 Commits

Author SHA1 Message Date
6cf8d03645 Merge pull request 'chore: 提交除 const.py 外的所有修改' (#6) from qiaoxinjiu into master
Reviewed-on: #6
2026-05-18 12:01:07 +08:00
qiaoxinjiu
3cc3dbe5d2 chore: 提交除 const.py 外的所有修改
1. 新增文件:
   - __sync_prod_db.py - 数据库同步脚本
   - uploads/ 目录下的PDF文档

2. 修改文件:
   - app/api/service/documentSourceService.py - 文档源服务
   - requirements.txt - 依赖配置
2026-05-18 11:59:46 +08:00
8 changed files with 12 additions and 2 deletions

0
__sync_prod_db.py Normal file
View File

View File

@@ -225,15 +225,21 @@ class DocumentSourceService:
def _extract_content_from_pdf(pdf_path): def _extract_content_from_pdf(pdf_path):
"""提取PDF内容""" """提取PDF内容"""
try: try:
from flask import current_app
from PyPDF2 import PdfReader from PyPDF2 import PdfReader
file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
current_app.logger.info(f'开始提取PDF内容: path={pdf_path}, size={file_size}')
reader = PdfReader(pdf_path) reader = PdfReader(pdf_path)
content = '' content = ''
for page in reader.pages: for page in reader.pages:
page_content = page.extract_text() page_content = page.extract_text()
if page_content: if page_content:
content += page_content + '\n' content += page_content + '\n'
current_app.logger.info(f'PDF内容提取完成: path={pdf_path}, pages={len(reader.pages)}, content_length={len(content)}')
return content return content
except Exception: except Exception as e:
from flask import current_app
current_app.logger.exception(f'PDF内容提取失败: path={pdf_path}, error={str(e)}')
return '' return ''
@staticmethod @staticmethod
@@ -269,7 +275,8 @@ class DocumentSourceService:
# 提取PDF内容 # 提取PDF内容
content = DocumentSourceService._extract_content_from_pdf(pdf_path) content = DocumentSourceService._extract_content_from_pdf(pdf_path)
if not content: if not content:
failed_docs.append({'documentId': doc_id, 'error': 'PDF内容为空'}) file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
failed_docs.append({'documentId': doc_id, 'error': f'PDF内容为空文件大小{file_size} bytes。请检查服务器是否安装PyPDF2、文件是否为扫描件或加密PDF'})
continue continue
# 更新文档内容 # 更新文档内容

View File

@@ -7,6 +7,9 @@ PyMySQL~=0.10.0
psycopg2-binary~=2.9.9 psycopg2-binary~=2.9.9
python-jenkins~=1.7.0 python-jenkins~=1.7.0
requests~=2.26.0 requests~=2.26.0
openai~=1.30.0
httpx~=0.27.0
PyPDF2~=3.0.1
Flask-Docs~=0.6.4 Flask-Docs~=0.6.4
flask_redis~=0.4.0 flask_redis~=0.4.0
jira~=3.0.1 jira~=3.0.1