Files
MoFin/venv/lib/python3.12/site-packages/nltk/test/unit/test_aline.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

239 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Test Aline algorithm for aligning phonetic sequences
"""
from nltk.metrics import aline
def test_aline():
result = aline.align("θin", "tenwis")
expected = [[("θ", "t"), ("i", "e"), ("n", "n")]]
assert result == expected
result = aline.align("jo", "ʒə")
expected = [[("j", "ʒ"), ("o", "ə")]]
assert result == expected
result = aline.align("pematesiweni", "pematesewen")
expected = [
[
("p", "p"),
("e", "e"),
("m", "m"),
("a", "a"),
("t", "t"),
("e", "e"),
("s", "s"),
("i", "e"),
("w", "w"),
("e", "e"),
("n", "n"),
]
]
assert result == expected
result = aline.align("tuwθ", "dentis")
expected = [[("t", "t"), ("u", "i"), ("w", "-"), ("θ", "s")]]
assert result == expected
def test_aline_delta():
"""
Test aline for computing the difference between two segments
"""
assert aline.delta("p", "q") == 20.0
assert aline.delta("a", "A") == 0.0
class TestAlineEdgeCases:
def test_identical_strings(self):
"""Aligning identical strings should produce perfect 1-to-1 mapping."""
result = aline.align("pat", "pat")
assert result == [[("p", "p"), ("a", "a"), ("t", "t")]]
def test_invalid_segment_raises(self):
"""Unknown segments should raise ValueError, not KeyError."""
try:
aline.align("p", "!")
assert False, "Expected ValueError"
except ValueError:
pass
try:
aline.align("", "t")
assert False, "Expected ValueError"
except ValueError:
pass
def test_single_char(self):
"""Single characters should align."""
result = aline.align("p", "p")
assert result == [[("p", "p")]]
def test_single_char_different(self):
"""Different single consonants should still align."""
result = aline.align("p", "t")
assert result == [[("p", "t")]]
def test_single_vowel(self):
"""Single vowels should align."""
result = aline.align("a", "e")
assert result == [[("a", "e")]]
# ---------------------------------------------------------
# Feature matrix correctness
# ---------------------------------------------------------
class TestFeatureMatrix:
"""Verify IPA feature assignments match phonetic definitions."""
def test_all_consonants_in_feature_matrix(self):
"""Every consonant in the consonants list should be in feature_matrix."""
for c in aline.consonants:
assert (
c in aline.feature_matrix
), f"Consonant {c!r} missing from feature_matrix"
def test_all_vowels_in_feature_matrix(self):
"""Every vowel (place='vowel' entry) should be a valid key."""
for v in aline.vowels:
assert v in aline.feature_matrix, f"Vowel {v!r} missing from feature_matrix"
def test_no_trailing_spaces_in_consonants(self):
"""No consonant entry should have trailing whitespace."""
for c in aline.consonants:
assert c == c.strip(), f"Consonant {c!r} has trailing whitespace"
def test_no_trailing_spaces_in_vowels(self):
"""No vowel entry should have trailing whitespace."""
for v in aline.vowels:
assert v == v.strip(), f"Vowel {v!r} has trailing whitespace"
def test_no_trailing_spaces_in_feature_matrix_keys(self):
"""No feature_matrix key should have trailing whitespace."""
for k in aline.feature_matrix:
assert k == k.strip(), f"Feature matrix key {k!r} has trailing whitespace"
def test_consonants_not_vowel_place(self):
"""No consonant should have place='vowel'."""
for c in aline.consonants:
assert (
aline.feature_matrix[c]["place"] != "vowel"
), f"Consonant {c!r} has place='vowel'"
def test_vowels_have_vowel_place(self):
"""All vowels in feature_matrix should have place='vowel'."""
for v in aline.vowels:
assert (
aline.feature_matrix[v]["place"] == "vowel"
), f"Vowel {v!r} has place={aline.feature_matrix[v]['place']!r}"
# IPA rounding checks
def test_unrounded_vowels(self):
"""Vowels that are unrounded in IPA should have round='minus'."""
unrounded = ["i", "e", "ɛ", "a", "ɯ", "ɘ", "ɜ", "ɐ", "ɑ", "ʌ", "ɤ"]
for v in unrounded:
if v in aline.feature_matrix:
assert (
aline.feature_matrix[v]["round"] == "minus"
), f"IPA unrounded vowel {v!r} has round={aline.feature_matrix[v]['round']!r}"
def test_rounded_vowels(self):
"""Vowels that are rounded in IPA should have round='plus'."""
rounded = ["o", "u", "y", "ɒ", "ɵ", "ʏ", "ʊ"]
for v in rounded:
if v in aline.feature_matrix:
assert (
aline.feature_matrix[v]["round"] == "plus"
), f"IPA rounded vowel {v!r} has round={aline.feature_matrix[v]['round']!r}"
# IPA height checks
def test_close_mid_vowels(self):
"""Close-mid vowels should have high='mid', not 'high'."""
close_mid = ["ɘ", "ɵ"]
for v in close_mid:
if v in aline.feature_matrix:
assert (
aline.feature_matrix[v]["high"] == "mid"
), f"Close-mid vowel {v!r} has high={aline.feature_matrix[v]['high']!r}"
class TestScoringFunctions:
def test_delta_identical_consonants(self):
"""Identical consonants should have delta = 0."""
assert aline.delta("p", "p") == 0.0
assert aline.delta("t", "t") == 0.0
assert aline.delta("s", "s") == 0.0
def test_delta_identical_vowels(self):
"""Identical vowels should have delta = 0."""
assert aline.delta("a", "a") == 0.0
assert aline.delta("i", "i") == 0.0
def test_delta_similar_consonants(self):
"""Similar consonants (same place) should have small delta."""
# p and b differ only in voicing
d_pb = aline.delta("p", "b")
# p and k differ in place
d_pk = aline.delta("p", "k")
assert d_pb < d_pk
def test_delta_symmetric(self):
"""Delta should be symmetric: delta(a,b) == delta(b,a)."""
assert aline.delta("p", "t") == aline.delta("t", "p")
assert aline.delta("a", "o") == aline.delta("o", "a")
assert aline.delta("s", "ʃ") == aline.delta("ʃ", "s")
def test_sigma_skip_negative(self):
"""sigma_skip should return a negative value (penalty)."""
assert aline.sigma_skip("p") < 0
assert aline.sigma_skip("a") < 0
def test_sigma_sub_identical(self):
"""Substituting identical segments should give max score."""
score_identical = aline.sigma_sub("p", "p")
score_different = aline.sigma_sub("p", "t")
assert score_identical > score_different
def test_sigma_exp_returns_value(self):
"""sigma_exp should return a numeric value without error."""
result = aline.sigma_exp("a", ("p", "b"))
assert isinstance(result, (int, float))
def test_V_vowel_gt_consonant(self):
"""V() for a vowel should be >= V() for a consonant."""
# V(p) for vowel uses C_vwl, for consonant returns 0
v_vowel = aline.V("a")
v_consonant = aline.V("p")
assert v_vowel >= v_consonant
def test_R_returns_list(self):
"""R() should return relevant features for the segment pair."""
# R takes two segments and returns the relevant feature list
r_consonants = aline.R("p", "t")
r_vowels = aline.R("a", "e")
assert isinstance(r_consonants, list)
assert isinstance(r_vowels, list)
assert len(r_consonants) > 0
assert len(r_vowels) > 0
class TestAlignmentProperties:
def test_alignment_not_empty(self):
"""Alignment of non-empty strings should produce non-empty result."""
result = aline.align("pat", "kat")
assert len(result) > 0
assert len(result[0]) > 0
def test_alignment_returns_list_of_lists(self):
"""align() should return a list of alignment lists."""
result = aline.align("pa", "ta")
assert isinstance(result, list)
assert isinstance(result[0], list)
assert isinstance(result[0][0], tuple)