piano-plan/app/services/pdf_generator.py

# PDF生成服务 - 支持中文和富文本

import os
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import mm
from reportlab.lib import colors
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
)
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.enums import TA_CENTER, TA_LEFT
from reportlab.pdfgen import canvas as pdfcanvas

# 注册中文字体
# Windows 和 Linux 使用不同路径
if os.name == 'nt':  # Windows
    FONT_PATH = r"C:\Windows\Fonts\msyh.ttc"
    FONT_BOLD_PATH = r"C:\Windows\Fonts\msyhbd.ttc"
else:  # Linux (Docker)
    # 直接尝试常见的中文字体路径
    possible_paths = [
        "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc",
        "/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
        "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc",
        "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
    ]
    FONT_PATH = None
    for p in possible_paths:
        if os.path.exists(p):
            FONT_PATH = p
            break
    if not FONT_PATH:
        FONT_PATH = possible_paths[0]  # 使用第一个作为默认值（会触发异常）
    FONT_BOLD_PATH = FONT_PATH  # 使用同一字体（不支持粗体分离）

try:
    pdfmetrics.registerFont(TTFont("Chinese", FONT_PATH))
    # 注册粗体（如果路径不同才注册，相同则复用）
    if FONT_BOLD_PATH != FONT_PATH:
        try:
            pdfmetrics.registerFont(TTFont("Chinese-Bold", FONT_BOLD_PATH))
        except:
            pdfmetrics.registerFont(TTFont("Chinese-Bold", FONT_PATH))
    else:
        # 同一字体，注册为 Chinese-Bold 别名
        try:
            pdfmetrics.registerFont(TTFont("Chinese-Bold", FONT_PATH))
        except:
            pass
    CHINESE_FONT_OK = True
except Exception as e:
    CHINESE_FONT_OK = False

import re
import qrcode
from io import BytesIO
from reportlab.platypus import Image as RLImage

# URL 正则表达式
URL_PATTERN = re.compile(r'https?://[^\s<>"{}|\\^`\[\]]+')

def generate_qr_image(url, size=50*mm):
    """生成二维码图片，返回 BytesIO 对象"""
    qr = qrcode.make(url, box_size=10)
    buf = BytesIO()
    qr.save(buf, format='PNG')
    buf.seek(0)
    return buf

def contains_url(text):
    """检测文本是否包含 URL"""
    return bool(URL_PATTERN.search(text))

def md_to_xml(text):
    """将markdown转换为reportlab XML markup"""
    if not text:
        return ""
    result = []
    i = 0
    while i < len(text):
        # 处理 bold **text** - 使用中文字体粗体
        if text[i:i+2] == '**':
            end = text.find('**', i+2)
            if end != -1:
                if CHINESE_FONT_OK:
                    result.append(f'<font name="Chinese-Bold">{text[i+2:end]}</font>')
                else:
                    result.append(f'<b>{text[i+2:end]}</b>')
                i = end + 2
                continue
        # 处理 italic *text*
        if text[i] == '*' and (i == 0 or text[i-1] not in '*_'):
            end = text.find('*', i+1)
            if end != -1 and text[end-1] != '*':
                result.append(f'<i>{text[i+1:end]}</i>')
                i = end + 1
                continue
        # 处理 inline code `text`
        if text[i] == '`':
            end = text.find('`', i+1)
            if end != -1:
                result.append(f'<font name="Courier">{text[i+1:end]}</font>')
                i = end + 1
                continue
        result.append(text[i])
        i += 1
    return ''.join(result)


class PianoPDF:
    def __init__(self):
        self.elements = []
        self.styles = getSampleStyleSheet()

        if CHINESE_FONT_OK:
            self.base_font = "Chinese"
            self.bold_font = "Chinese-Bold"
        else:
            self.base_font = "Helvetica"
            self.bold_font = "Helvetica-Bold"

        # 标题样式
        self.title_style = ParagraphStyle(
            "CustomTitle",
            parent=self.styles["Heading1"],
            fontName=self.bold_font,
            fontSize=18,
            spaceAfter=10*mm,
            alignment=TA_CENTER,
        )

        # 二级标题
        self.heading_style = ParagraphStyle(
            "CustomHeading",
            parent=self.styles["Heading2"],
            fontName=self.bold_font,
            fontSize=14,
            spaceAfter=6*mm,
            spaceBefore=6*mm,
            textColor=colors.HexColor("#2c3e50"),
        )

        # 三级标题
        self.h3_style = ParagraphStyle(
            "CustomH3",
            parent=self.styles["Heading3"],
            fontName=self.bold_font,
            fontSize=12,
            spaceAfter=4*mm,
            spaceBefore=4*mm,
            textColor=colors.HexColor("#34495e"),
        )

        # 正文样式
        self.body_style = ParagraphStyle(
            "CustomBody",
            parent=self.styles["Normal"],
            fontName=self.base_font,
            fontSize=12,
            spaceAfter=1*mm,
            leading=14,
        )

        # 表格样式
        self.table_style = TableStyle([
            ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#3498db")),
            ("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
            ("ALIGN", (0, 0), (-1, -1), "LEFT"),
            ("FONTNAME", (0, 0), (-1, 0), self.bold_font),
            ("FONTSIZE", (0, 0), (-1, 0), 12),
            ("BOTTOMPADDING", (0, 0), (-1, 0), 8),
            ("BACKGROUND", (0, 1), (-1, -1), colors.white),
            ("FONTNAME", (0, 1), (-1, -1), self.base_font),
            ("FONTSIZE", (0, 1), (-1, -1), 11),
            ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
            ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f9fa")]),
        ])

    def add_title(self, text):
        self.elements.append(Paragraph(md_to_xml(text), self.title_style))
        self.elements.append(Spacer(1, 5*mm))

    def add_heading(self, text, level=2):
        if level == 3:
            self.elements.append(Paragraph(md_to_xml(text), self.h3_style))
        else:
            self.elements.append(Paragraph(md_to_xml(text), self.heading_style))

    def add_paragraph(self, text):
        if not text:
            return

        # 检测是否是纯 URL
        url_match = URL_PATTERN.match(text.strip())
        if url_match and url_match.group() == text.strip():
            # 纯 URL，生成二维码
            url = url_match.group()
            try:
                buf = generate_qr_image(url, 50*mm)
                img = RLImage(buf, width=50*mm, height=50*mm)
                img.hAlign = 'CENTER'
                self.elements.append(img)
                self.elements.append(Spacer(1, 2*mm))
                return
            except Exception as e:
                # QR生成失败，回退到文字
                pass

        self.elements.append(Paragraph(md_to_xml(text), self.body_style))
        self.elements.append(Spacer(1, 1*mm))

    def add_list(self, items):
        for item in items:
            if item:
                self.elements.append(Paragraph(f"• {md_to_xml(item)}", self.body_style))
        self.elements.append(Spacer(1, 1*mm))

    def add_table(self, data):
        if not data or len(data) < 2:
            return
        table_data = []
        for row in data:
            table_data.append([Paragraph(md_to_xml(str(cell)), self.body_style) if cell else "" for cell in row])
        if table_data:
            table = Table(table_data)
            table.setStyle(self.table_style)
            self.elements.append(table)
            self.elements.append(Spacer(1, 3*mm))


def generate_pdf(plan_id, student_name, content, output_dir, rendered_report=None, watermark_text=None):
    """生成PDF文件"""
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, f"plan_{plan_id}.pdf")

    doc = SimpleDocTemplate(
        output_path, pagesize=A4,
        rightMargin=20*mm, leftMargin=20*mm,
        topMargin=20*mm, bottomMargin=20*mm,
    )

    pdf = PianoPDF()

    if rendered_report:
        # 解析Markdown并添加到PDF
        lines = rendered_report.split('\n')
        in_table = False
        table_data = []

        for line in lines:
            line = line.strip()
            if not line or line.startswith('---'):
                if in_table and table_data:
                    pdf.add_table(table_data)
                    table_data = []
                    in_table = False
                continue

            if line.startswith('# ') and not line.startswith('## '):
                if in_table and table_data:
                    pdf.add_table(table_data)
                    table_data = []
                    in_table = False
                pdf.add_title(line.replace('# ', ''))
            elif line.startswith('## '):
                if in_table and table_data:
                    pdf.add_table(table_data)
                    table_data = []
                    in_table = False
                pdf.add_heading(line.replace('## ', ''))
            elif line.startswith('### '):
                pdf.add_heading(line.replace('### ', ''), level=3)
            elif line.startswith('|') and '|' in line[1:]:
                in_table = True
                raw_cells = [c.strip() for c in line.split('|')[1:-1]]
                cells = [md_to_xml(c) if c else "" for c in raw_cells]
                if cells and not all(c and c.strip().startswith('-') for c in cells if c):
                    table_data.append(cells)
            elif line.startswith('- '):
                pdf.add_paragraph(f"• {line[2:]}")
            elif line and not in_table:
                pdf.add_paragraph(line)

        if in_table and table_data:
            pdf.add_table(table_data)
    else:
        # 使用结构化内容
        pdf.add_title(f"钢琴练习方案 - {student_name}")
        pdf.add_heading("学员信息")
        pdf.add_paragraph(f"学员姓名：{student_name}")
        pdf.add_paragraph(f"每日练习时间：{content.get('practice_time', 'N/A')}")
        pdf.add_paragraph(f"生成时间：{content.get('generated_at', '')}")

        if content.get("ai_report"):
            pdf.add_heading("AI个性化练习报告")
            for line in content["ai_report"].split("\n"):
                line = line.strip()
                if line.startswith("### "):
                    pdf.add_heading(line.replace("### ", ""), level=3)
                elif line.startswith("## "):
                    pdf.add_heading(line.replace("## ", ""))
                elif line.startswith("# "):
                    pdf.add_title(line.replace("# ", ""))
                elif line.startswith("- "):
                    pdf.add_paragraph(line)
                elif line:
                    pdf.add_paragraph(line)

    # 水印函数（每页都绘制）
    def draw_watermark(c, doc):
        if not watermark_text:
            return
        if not CHINESE_FONT_OK:
            return
        c.saveState()
        try:
            c.setFont("Chinese", 56)
            # 浅灰色半透明
            c.setFillColor(colors.Color(0.6, 0.6, 0.6, alpha=0.25))
            # 旋转45度
            c.translate(A4[0]/2, A4[1]/2)
            c.rotate(45)
            # 绘制水印文字（居中）
            c.drawCentredString(0, 0, watermark_text)
        except Exception:
            pass
        c.restoreState()

    # 移除末尾的空白元素（避免产生多余空白页）
    while pdf.elements and isinstance(pdf.elements[-1], Spacer):
        pdf.elements.pop()

    doc.build(pdf.elements, onFirstPage=draw_watermark, onLaterPages=draw_watermark)
    return output_path