336 lines
12 KiB
Python
336 lines
12 KiB
Python
# PDF生成服务 - 支持中文和富文本
|
|
|
|
import os
|
|
from reportlab.lib.pagesizes import A4
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
from reportlab.lib.units import mm
|
|
from reportlab.lib import colors
|
|
from reportlab.platypus import (
|
|
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
|
)
|
|
from reportlab.pdfbase import pdfmetrics
|
|
from reportlab.pdfbase.ttfonts import TTFont
|
|
from reportlab.lib.enums import TA_CENTER, TA_LEFT
|
|
from reportlab.pdfgen import canvas as pdfcanvas
|
|
|
|
# 注册中文字体
|
|
# Windows 和 Linux 使用不同路径
|
|
if os.name == 'nt': # Windows
|
|
FONT_PATH = r"C:\Windows\Fonts\msyh.ttc"
|
|
FONT_BOLD_PATH = r"C:\Windows\Fonts\msyhbd.ttc"
|
|
else: # Linux (Docker)
|
|
# 直接尝试常见的中文字体路径
|
|
possible_paths = [
|
|
"/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
|
|
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
|
|
"/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
|
|
"/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
|
|
]
|
|
FONT_PATH = None
|
|
for p in possible_paths:
|
|
if os.path.exists(p):
|
|
FONT_PATH = p
|
|
break
|
|
if not FONT_PATH:
|
|
FONT_PATH = possible_paths[0] # 使用第一个作为默认值(会触发异常)
|
|
FONT_BOLD_PATH = FONT_PATH # 使用同一字体(不支持粗体分离)
|
|
|
|
try:
|
|
pdfmetrics.registerFont(TTFont("Chinese", FONT_PATH))
|
|
# 注册粗体(如果路径不同才注册,相同则复用)
|
|
if FONT_BOLD_PATH != FONT_PATH:
|
|
try:
|
|
pdfmetrics.registerFont(TTFont("Chinese-Bold", FONT_BOLD_PATH))
|
|
except:
|
|
pdfmetrics.registerFont(TTFont("Chinese-Bold", FONT_PATH))
|
|
else:
|
|
# 同一字体,注册为 Chinese-Bold 别名
|
|
try:
|
|
pdfmetrics.registerFont(TTFont("Chinese-Bold", FONT_PATH))
|
|
except:
|
|
pass
|
|
CHINESE_FONT_OK = True
|
|
except Exception as e:
|
|
CHINESE_FONT_OK = False
|
|
|
|
import re
|
|
import qrcode
|
|
from io import BytesIO
|
|
from reportlab.platypus import Image as RLImage
|
|
|
|
# URL 正则表达式
|
|
URL_PATTERN = re.compile(r'https?://[^\s<>"{}|\\^`\[\]]+')
|
|
|
|
def generate_qr_image(url, size=50*mm):
|
|
"""生成二维码图片,返回 BytesIO 对象"""
|
|
qr = qrcode.make(url, box_size=10)
|
|
buf = BytesIO()
|
|
qr.save(buf, format='PNG')
|
|
buf.seek(0)
|
|
return buf
|
|
|
|
def contains_url(text):
|
|
"""检测文本是否包含 URL"""
|
|
return bool(URL_PATTERN.search(text))
|
|
|
|
def md_to_xml(text):
|
|
"""将markdown转换为reportlab XML markup"""
|
|
if not text:
|
|
return ""
|
|
result = []
|
|
i = 0
|
|
while i < len(text):
|
|
# 处理 bold **text** - 使用中文字体粗体
|
|
if text[i:i+2] == '**':
|
|
end = text.find('**', i+2)
|
|
if end != -1:
|
|
if CHINESE_FONT_OK:
|
|
result.append(f'<font name="Chinese-Bold">{text[i+2:end]}</font>')
|
|
else:
|
|
result.append(f'<b>{text[i+2:end]}</b>')
|
|
i = end + 2
|
|
continue
|
|
# 处理 italic *text*
|
|
if text[i] == '*' and (i == 0 or text[i-1] not in '*_'):
|
|
end = text.find('*', i+1)
|
|
if end != -1 and text[end-1] != '*':
|
|
result.append(f'<i>{text[i+1:end]}</i>')
|
|
i = end + 1
|
|
continue
|
|
# 处理 inline code `text`
|
|
if text[i] == '`':
|
|
end = text.find('`', i+1)
|
|
if end != -1:
|
|
result.append(f'<font name="Courier">{text[i+1:end]}</font>')
|
|
i = end + 1
|
|
continue
|
|
result.append(text[i])
|
|
i += 1
|
|
return ''.join(result)
|
|
|
|
|
|
class PianoPDF:
|
|
def __init__(self):
|
|
self.elements = []
|
|
self.styles = getSampleStyleSheet()
|
|
|
|
if CHINESE_FONT_OK:
|
|
self.base_font = "Chinese"
|
|
self.bold_font = "Chinese-Bold"
|
|
else:
|
|
self.base_font = "Helvetica"
|
|
self.bold_font = "Helvetica-Bold"
|
|
|
|
# 标题样式
|
|
self.title_style = ParagraphStyle(
|
|
"CustomTitle",
|
|
parent=self.styles["Heading1"],
|
|
fontName=self.bold_font,
|
|
fontSize=18,
|
|
spaceAfter=10*mm,
|
|
alignment=TA_CENTER,
|
|
)
|
|
|
|
# 二级标题
|
|
self.heading_style = ParagraphStyle(
|
|
"CustomHeading",
|
|
parent=self.styles["Heading2"],
|
|
fontName=self.bold_font,
|
|
fontSize=14,
|
|
spaceAfter=6*mm,
|
|
spaceBefore=6*mm,
|
|
textColor=colors.HexColor("#2c3e50"),
|
|
)
|
|
|
|
# 三级标题
|
|
self.h3_style = ParagraphStyle(
|
|
"CustomH3",
|
|
parent=self.styles["Heading3"],
|
|
fontName=self.bold_font,
|
|
fontSize=12,
|
|
spaceAfter=4*mm,
|
|
spaceBefore=4*mm,
|
|
textColor=colors.HexColor("#34495e"),
|
|
)
|
|
|
|
# 正文样式
|
|
self.body_style = ParagraphStyle(
|
|
"CustomBody",
|
|
parent=self.styles["Normal"],
|
|
fontName=self.base_font,
|
|
fontSize=12,
|
|
spaceAfter=1*mm,
|
|
leading=14,
|
|
)
|
|
|
|
# 表格样式
|
|
self.table_style = TableStyle([
|
|
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#3498db")),
|
|
("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
|
|
("ALIGN", (0, 0), (-1, -1), "LEFT"),
|
|
("FONTNAME", (0, 0), (-1, 0), self.bold_font),
|
|
("FONTSIZE", (0, 0), (-1, 0), 12),
|
|
("BOTTOMPADDING", (0, 0), (-1, 0), 8),
|
|
("BACKGROUND", (0, 1), (-1, -1), colors.white),
|
|
("FONTNAME", (0, 1), (-1, -1), self.base_font),
|
|
("FONTSIZE", (0, 1), (-1, -1), 11),
|
|
("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
|
|
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f9fa")]),
|
|
])
|
|
|
|
def add_title(self, text):
|
|
self.elements.append(Paragraph(md_to_xml(text), self.title_style))
|
|
self.elements.append(Spacer(1, 5*mm))
|
|
|
|
def add_heading(self, text, level=2):
|
|
if level == 3:
|
|
self.elements.append(Paragraph(md_to_xml(text), self.h3_style))
|
|
else:
|
|
self.elements.append(Paragraph(md_to_xml(text), self.heading_style))
|
|
|
|
def add_paragraph(self, text):
|
|
if not text:
|
|
return
|
|
|
|
# 检测是否是纯 URL
|
|
url_match = URL_PATTERN.match(text.strip())
|
|
if url_match and url_match.group() == text.strip():
|
|
# 纯 URL,生成二维码
|
|
url = url_match.group()
|
|
try:
|
|
buf = generate_qr_image(url, 50*mm)
|
|
img = RLImage(buf, width=50*mm, height=50*mm)
|
|
img.hAlign = 'CENTER'
|
|
self.elements.append(img)
|
|
self.elements.append(Spacer(1, 2*mm))
|
|
return
|
|
except Exception as e:
|
|
# QR生成失败,回退到文字
|
|
pass
|
|
|
|
self.elements.append(Paragraph(md_to_xml(text), self.body_style))
|
|
self.elements.append(Spacer(1, 1*mm))
|
|
|
|
def add_list(self, items):
|
|
for item in items:
|
|
if item:
|
|
self.elements.append(Paragraph(f"• {md_to_xml(item)}", self.body_style))
|
|
self.elements.append(Spacer(1, 1*mm))
|
|
|
|
def add_table(self, data):
|
|
if not data or len(data) < 2:
|
|
return
|
|
table_data = []
|
|
for row in data:
|
|
table_data.append([Paragraph(md_to_xml(str(cell)), self.body_style) if cell else "" for cell in row])
|
|
if table_data:
|
|
table = Table(table_data)
|
|
table.setStyle(self.table_style)
|
|
self.elements.append(table)
|
|
self.elements.append(Spacer(1, 3*mm))
|
|
|
|
|
|
def generate_pdf(plan_id, student_name, content, output_dir, rendered_report=None, watermark_text=None):
|
|
"""生成PDF文件"""
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
output_path = os.path.join(output_dir, f"plan_{plan_id}.pdf")
|
|
|
|
doc = SimpleDocTemplate(
|
|
output_path, pagesize=A4,
|
|
rightMargin=20*mm, leftMargin=20*mm,
|
|
topMargin=20*mm, bottomMargin=20*mm,
|
|
)
|
|
|
|
pdf = PianoPDF()
|
|
|
|
if rendered_report:
|
|
# 解析Markdown并添加到PDF
|
|
lines = rendered_report.split('\n')
|
|
in_table = False
|
|
table_data = []
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line or line.startswith('---'):
|
|
if in_table and table_data:
|
|
pdf.add_table(table_data)
|
|
table_data = []
|
|
in_table = False
|
|
continue
|
|
|
|
if line.startswith('# ') and not line.startswith('## '):
|
|
if in_table and table_data:
|
|
pdf.add_table(table_data)
|
|
table_data = []
|
|
in_table = False
|
|
pdf.add_title(line.replace('# ', ''))
|
|
elif line.startswith('## '):
|
|
if in_table and table_data:
|
|
pdf.add_table(table_data)
|
|
table_data = []
|
|
in_table = False
|
|
pdf.add_heading(line.replace('## ', ''))
|
|
elif line.startswith('### '):
|
|
pdf.add_heading(line.replace('### ', ''), level=3)
|
|
elif line.startswith('|') and '|' in line[1:]:
|
|
in_table = True
|
|
raw_cells = [c.strip() for c in line.split('|')[1:-1]]
|
|
cells = [md_to_xml(c) if c else "" for c in raw_cells]
|
|
if cells and not all(c and c.strip().startswith('-') for c in cells if c):
|
|
table_data.append(cells)
|
|
elif line.startswith('- '):
|
|
pdf.add_paragraph(f"• {line[2:]}")
|
|
elif line and not in_table:
|
|
pdf.add_paragraph(line)
|
|
|
|
if in_table and table_data:
|
|
pdf.add_table(table_data)
|
|
else:
|
|
# 使用结构化内容
|
|
pdf.add_title(f"钢琴练习方案 - {student_name}")
|
|
pdf.add_heading("学员信息")
|
|
pdf.add_paragraph(f"学员姓名:{student_name}")
|
|
pdf.add_paragraph(f"每日练习时间:{content.get('practice_time', 'N/A')}")
|
|
pdf.add_paragraph(f"生成时间:{content.get('generated_at', '')}")
|
|
|
|
if content.get("ai_report"):
|
|
pdf.add_heading("AI个性化练习报告")
|
|
for line in content["ai_report"].split("\n"):
|
|
line = line.strip()
|
|
if line.startswith("### "):
|
|
pdf.add_heading(line.replace("### ", ""), level=3)
|
|
elif line.startswith("## "):
|
|
pdf.add_heading(line.replace("## ", ""))
|
|
elif line.startswith("# "):
|
|
pdf.add_title(line.replace("# ", ""))
|
|
elif line.startswith("- "):
|
|
pdf.add_paragraph(line)
|
|
elif line:
|
|
pdf.add_paragraph(line)
|
|
|
|
# 水印函数(每页都绘制)
|
|
def draw_watermark(c, doc):
|
|
if not watermark_text:
|
|
return
|
|
if not CHINESE_FONT_OK:
|
|
return
|
|
c.saveState()
|
|
try:
|
|
c.setFont("Chinese", 56)
|
|
# 浅灰色半透明
|
|
c.setFillColor(colors.Color(0.6, 0.6, 0.6, alpha=0.25))
|
|
# 旋转45度
|
|
c.translate(A4[0]/2, A4[1]/2)
|
|
c.rotate(45)
|
|
# 绘制水印文字(居中)
|
|
c.drawCentredString(0, 0, watermark_text)
|
|
except Exception:
|
|
pass
|
|
c.restoreState()
|
|
|
|
# 移除末尾的空白元素(避免产生多余空白页)
|
|
while pdf.elements and isinstance(pdf.elements[-1], Spacer):
|
|
pdf.elements.pop()
|
|
|
|
doc.build(pdf.elements, onFirstPage=draw_watermark, onLaterPages=draw_watermark)
|
|
return output_path |