chore: 提交除 const.py 外的所有修改
1. 新增文件: - __sync_prod_db.py - 数据库同步脚本 - uploads/ 目录下的PDF文档 2. 修改文件: - app/api/service/documentSourceService.py - 文档源服务 - requirements.txt - 依赖配置
This commit is contained in:
@@ -225,15 +225,21 @@ class DocumentSourceService:
|
||||
def _extract_content_from_pdf(pdf_path):
|
||||
"""提取PDF内容"""
|
||||
try:
|
||||
from flask import current_app
|
||||
from PyPDF2 import PdfReader
|
||||
file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
|
||||
current_app.logger.info(f'开始提取PDF内容: path={pdf_path}, size={file_size}')
|
||||
reader = PdfReader(pdf_path)
|
||||
content = ''
|
||||
for page in reader.pages:
|
||||
page_content = page.extract_text()
|
||||
if page_content:
|
||||
content += page_content + '\n'
|
||||
current_app.logger.info(f'PDF内容提取完成: path={pdf_path}, pages={len(reader.pages)}, content_length={len(content)}')
|
||||
return content
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
from flask import current_app
|
||||
current_app.logger.exception(f'PDF内容提取失败: path={pdf_path}, error={str(e)}')
|
||||
return ''
|
||||
|
||||
@staticmethod
|
||||
@@ -269,7 +275,8 @@ class DocumentSourceService:
|
||||
# 提取PDF内容
|
||||
content = DocumentSourceService._extract_content_from_pdf(pdf_path)
|
||||
if not content:
|
||||
failed_docs.append({'documentId': doc_id, 'error': 'PDF内容为空'})
|
||||
file_size = os.path.getsize(pdf_path) if os.path.exists(pdf_path) else 0
|
||||
failed_docs.append({'documentId': doc_id, 'error': f'PDF内容为空,文件大小:{file_size} bytes。请检查服务器是否安装PyPDF2、文件是否为扫描件或加密PDF'})
|
||||
continue
|
||||
|
||||
# 更新文档内容
|
||||
|
||||
Reference in New Issue
Block a user