Files
skills/musicXML-ocr/scripts/extract_pdf_images.py
T
hmo 04db423416 Initial commit: skills library
- 70 skills with code and documentation
- Add .gitignore (ignore __pycache__, output/, temp/, venv/)
- Clean up test intermediates and caches
2026-04-26 19:27:40 +08:00

134 lines
3.6 KiB
Python

#!/usr/bin/env python3
"""
extract_pdf_images.py
从 PDF 中提取五线谱图片,用于后续 OMR 处理。
功能:
1. 打开 PDF 文件
2. 提取每页中的图片
3. 保存为 PNG 文件
Usage:
python extract_pdf_images.py <pdf_path> [output_dir]
Examples:
python extract_pdf_images.py "D:/scores/sheet.pdf"
# 提取到指定目录
python extract_pdf_images.py "D:/scores/sheet.pdf" "D:/output/sheets"
Dependencies:
pip install pymupdf
"""
import os
import sys
import argparse
try:
import fitz # PyMuPDF
except ImportError:
print("Error: PyMuPDF not installed.")
print("Install with: pip install pymupdf")
sys.exit(1)
def extract_images_from_pdf(pdf_path: str, output_dir: str = None) -> list:
"""
从 PDF 中提取所有图片。
Args:
pdf_path: PDF 文件路径
output_dir: 输出目录,默认 temp/pdf_sheets
Returns:
提取的图片路径列表
"""
if not os.path.exists(pdf_path):
raise FileNotFoundError(f"PDF not found: {pdf_path}")
if output_dir is None:
output_dir = os.path.join(os.path.dirname(pdf_path), 'temp', 'pdf_sheets')
os.makedirs(output_dir, exist_ok=True)
doc = fitz.open(pdf_path)
print(f"[PDF] Opened: {pdf_path}")
print(f"[PDF] Pages: {len(doc)}")
extracted = []
for page_num in range(len(doc)):
page = doc[page_num]
images = page.get_images()
print(f"[PDF] Page {page_num + 1}: {len(images)} image(s)")
for img_idx, img in enumerate(images):
xref = img[0]
pix = fitz.Pixmap(doc, xref)
# 处理颜色模式
if pix.n - pix.alpha < 4:
# RGB 或灰度
out_path = os.path.join(
output_dir,
f"page{page_num+1:03d}_img{img_idx+1:02d}.png"
)
pix.save(out_path)
else:
# CMYK,转换为 RGB
pix1 = fitz.Pixmap(fitz.csRGB, pix)
out_path = os.path.join(
output_dir,
f"page{page_num+1:03d}_img{img_idx+1:02d}.png"
)
pix1.save(out_path)
print(f" -> {out_path}")
extracted.append(out_path)
doc.close()
return extracted
def main():
parser = argparse.ArgumentParser(
description="Extract images from PDF for OMR processing",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python extract_pdf_images.py "D:\\scores\\sheet.pdf"
python extract_pdf_images.py "D:\\scores\\sheet.pdf" "D:\\output\\sheets"
"""
)
parser.add_argument('pdf', help="Input PDF file")
parser.add_argument('output_dir', nargs='?', default=None,
help="Output directory (default: temp/pdf_sheets in PDF dir)")
args = parser.parse_args()
pdf_path = os.path.abspath(args.pdf)
output_dir = os.path.abspath(args.output_dir) if args.output_dir else None
try:
images = extract_images_from_pdf(pdf_path, output_dir)
print()
print("=" * 50)
print(f"Extracted {len(images)} image(s)")
print("=" * 50)
if images:
print("\nNext steps:")
print(f" 1. Audiveris: & 'C:\\Program Files\\Audiveris\\Audiveris.exe' -batch -export -output <dir> {' '.join(images)}")
print(f" 2. Or use the audiveris_to_musescore.py script for each image")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == '__main__':
main()