04db423416
- 70 skills with code and documentation - Add .gitignore (ignore __pycache__, output/, temp/, venv/) - Clean up test intermediates and caches
184 lines
5.3 KiB
Python
184 lines
5.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
homr_to_musescore.py
|
||
|
||
完整的 homr → MusicXML → MuseScore 可用格式 流水线。
|
||
|
||
功能:
|
||
1. 调用 homr 识别五线谱图片
|
||
2. 删除 <print> 元素(修复错误分行)
|
||
3. 添加花括号分组(钢琴谱 grand staff)
|
||
|
||
Usage:
|
||
python homr_to_musescore.py image.png [output.musicxml]
|
||
|
||
Dependencies:
|
||
- homr (pip install homr)
|
||
- Python 3.8+ (标准库 xml.etree.ElementTree)
|
||
|
||
Environment:
|
||
依赖 base conda 环境(已包含 homr):
|
||
conda activate base
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
import tempfile
|
||
import shutil
|
||
import xml.etree.ElementTree as ET
|
||
|
||
# homr 依赖检查
|
||
try:
|
||
from homr.main import process_image, ProcessingConfig, XmlGeneratorArguments
|
||
HAS_HOMR = True
|
||
except ImportError:
|
||
HAS_HOMR = False
|
||
|
||
|
||
def step1_homr(image_path: str) -> str:
|
||
"""Step 1: 用 homr 识别图片,输出 MusicXML 路径。"""
|
||
print(f"[Step 1] Running homr OCR on: {image_path}")
|
||
|
||
if not HAS_HOMR:
|
||
print("Error: homr not installed. Run: pip install homr")
|
||
sys.exit(1)
|
||
|
||
# homr 不支持中文路径,用临时文件
|
||
tmp_dir = tempfile.mkdtemp(prefix='homr_')
|
||
tmp_image = os.path.join(tmp_dir, os.path.basename(image_path))
|
||
shutil.copy(image_path, tmp_image)
|
||
|
||
try:
|
||
config = ProcessingConfig(False, False, False, False, -1)
|
||
xml_args = XmlGeneratorArguments(False, None, None)
|
||
result = process_image(tmp_image, config, xml_args)
|
||
musicxml_path = result
|
||
print(f"[Step 1] Done: {musicxml_path}")
|
||
return musicxml_path
|
||
finally:
|
||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||
|
||
|
||
def step2_remove_print(input_path: str, output_path: str) -> None:
|
||
"""Step 2: 删除所有 <print> 元素。"""
|
||
print(f"[Step 2] Removing <print> elements from: {input_path}")
|
||
|
||
ET.register_namespace('m', 'http://www.musescore.org/ns/mscore')
|
||
ET.register_namespace('xlink', 'http://www.w3.org/1999/xlink')
|
||
|
||
tree = ET.parse(input_path)
|
||
root = tree.getroot()
|
||
removed_count = 0
|
||
|
||
for elem in root.iter():
|
||
to_remove = [c for c in list(elem) if c.tag == 'print']
|
||
for c in to_remove:
|
||
elem.remove(c)
|
||
removed_count += 1
|
||
|
||
try:
|
||
ET.indent(root)
|
||
except AttributeError:
|
||
pass
|
||
|
||
tree.write(output_path, encoding='UTF-8', xml_declaration=True)
|
||
print(f"[Step 2] Removed {removed_count} <print> element(s)")
|
||
|
||
|
||
def step3_add_brace(input_path: str, output_path: str) -> None:
|
||
"""Step 3: 添加花括号分组。"""
|
||
print(f"[Step 3] Adding brace grouping to: {input_path}")
|
||
|
||
ET.register_namespace('m', 'http://www.musescore.org/ns/mscore')
|
||
ET.register_namespace('xlink', 'http://www.w3.org/1999/xlink')
|
||
|
||
tree = ET.parse(input_path)
|
||
root = tree.getroot()
|
||
part_list = root.find('part-list')
|
||
|
||
if part_list is None:
|
||
print("[Step 3] Warning: no <part-list> found, skipping brace")
|
||
shutil.copy(input_path, output_path)
|
||
return
|
||
|
||
group_start = ET.Element('part-group')
|
||
group_start.set('number', '1')
|
||
group_start.set('type', 'start')
|
||
ET.SubElement(group_start, 'group-symbol').text = 'brace'
|
||
ET.SubElement(group_start, 'group-barline').text = 'bracket'
|
||
ET.SubElement(group_start, 'group-time')
|
||
part_list.insert(0, group_start)
|
||
|
||
for sp in part_list.findall('score-part'):
|
||
g = ET.SubElement(sp, 'group')
|
||
g.text = '1'
|
||
|
||
part_list.append(ET.Element('part-group', number='1', type='stop'))
|
||
|
||
try:
|
||
ET.indent(root)
|
||
except AttributeError:
|
||
pass
|
||
|
||
tree.write(output_path, encoding='UTF-8', xml_declaration=True)
|
||
print(f"[Step 3] Done")
|
||
|
||
|
||
def main():
|
||
if len(sys.argv) < 2:
|
||
print(__doc__)
|
||
sys.exit(1)
|
||
|
||
image_path = sys.argv[1]
|
||
if not os.path.exists(image_path):
|
||
print(f"Error: file not found: {image_path}")
|
||
sys.exit(1)
|
||
|
||
# 确定输出文件名
|
||
if len(sys.argv) > 2:
|
||
output_path = sys.argv[2]
|
||
else:
|
||
base = os.path.splitext(os.path.basename(image_path))[0]
|
||
output_path = os.path.join(os.path.dirname(image_path), f"{base}_final.musicxml")
|
||
|
||
# 创建临时目录存放中间文件
|
||
tmp_dir = tempfile.mkdtemp(prefix='homr_pipeline_')
|
||
|
||
try:
|
||
# Step 1: homr 识别
|
||
raw_xml = os.path.join(tmp_dir, 'raw.musicxml')
|
||
step1_homr(image_path)
|
||
|
||
# homr 会在图片同目录生成 .musicxml 文件
|
||
expected = image_path.replace(os.path.splitext(image_path)[1], '.musicxml')
|
||
if os.path.exists(expected):
|
||
raw_xml = expected
|
||
else:
|
||
# 尝试从 tmp_dir 找
|
||
candidates = [f for f in os.listdir(tmp_dir) if f.endswith('.musicxml')]
|
||
if candidates:
|
||
raw_xml = os.path.join(tmp_dir, candidates[0])
|
||
|
||
if not os.path.exists(raw_xml):
|
||
print(f"Error: homr did not produce output file")
|
||
sys.exit(1)
|
||
|
||
# Step 2: 删除 print
|
||
no_print_xml = os.path.join(tmp_dir, 'no_print.musicxml')
|
||
step2_remove_print(raw_xml, no_print_xml)
|
||
|
||
# Step 3: 添加花括号
|
||
step3_add_brace(no_print_xml, output_path)
|
||
|
||
print()
|
||
print(f"Pipeline complete!")
|
||
print(f"Final output: {output_path}")
|
||
print(f"Open in MuseScore to verify: 5 measures per line, brace on grand staff")
|
||
|
||
finally:
|
||
shutil.rmtree(tmp_dir, ignore_errors=True)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|