chore: 提交除 const.py 外的所有修改
1. 新增文件: - __sync_prod_db.py - 数据库同步脚本 - uploads/ 目录下的PDF文档 2. 修改文件: - app/api/service/documentSourceService.py - 文档源服务 - requirements.txt - 依赖配置
This commit is contained in:
0
__sync_prod_db.py
Normal file
0
__sync_prod_db.py
Normal file
@@ -225,15 +225,21 @@ class DocumentSourceService:
|
||||
def _extract_content_from_pdf(pdf_path):
|
||||
"""提取PDF内容"""
|
||||
try:
|
||||
from flask import current_app
|
||||
from PyPDF2 import PdfReader
|
||||
file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
|
||||
current_app.logger.info(f'开始提取PDF内容: path={pdf_path}, size={file_size}')
|
||||
reader = PdfReader(pdf_path)
|
||||
content = ''
|
||||
for page in reader.pages:
|
||||
page_content = page.extract_text()
|
||||
if page_content:
|
||||
content += page_content + '\n'
|
||||
current_app.logger.info(f'PDF内容提取完成: path={pdf_path}, pages={len(reader.pages)}, content_length={len(content)}')
|
||||
return content
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
from flask import current_app
|
||||
current_app.logger.exception(f'PDF内容提取失败: path={pdf_path}, error={str(e)}')
|
||||
return ''
|
||||
|
||||
@staticmethod
|
||||
@@ -269,7 +275,8 @@ class DocumentSourceService:
|
||||
# 提取PDF内容
|
||||
content = DocumentSourceService._extract_content_from_pdf(pdf_path)
|
||||
if not content:
|
||||
failed_docs.append({'documentId': doc_id, 'error': 'PDF内容为空'})
|
||||
file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
|
||||
failed_docs.append({'documentId': doc_id, 'error': f'PDF内容为空,文件大小:{file_size} bytes。请检查服务器是否安装PyPDF2、文件是否为扫描件或加密PDF'})
|
||||
continue
|
||||
|
||||
# 更新文档内容
|
||||
|
||||
@@ -7,6 +7,9 @@ PyMySQL~=0.10.0
|
||||
psycopg2-binary~=2.9.9
|
||||
python-jenkins~=1.7.0
|
||||
requests~=2.26.0
|
||||
openai~=1.30.0
|
||||
httpx~=0.27.0
|
||||
PyPDF2~=3.0.1
|
||||
Flask-Docs~=0.6.4
|
||||
flask_redis~=0.4.0
|
||||
jira~=3.0.1
|
||||
Binary file not shown.
BIN
uploads/智慧运营/智慧运营V2.0/20260514174912-SZ采购工作台V2_0PRD-f77aac9e.pdf
Normal file
BIN
uploads/智慧运营/智慧运营V2.0/20260514174912-SZ采购工作台V2_0PRD-f77aac9e.pdf
Normal file
Binary file not shown.
BIN
uploads/智慧运营/智慧运营V2.0/20260514174912-回货单流程-aa48090c.pdf
Normal file
BIN
uploads/智慧运营/智慧运营V2.0/20260514174912-回货单流程-aa48090c.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user