Files
MoFin/venv/lib/python3.12/site-packages/nltk/test/unit/test_brill.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

35 lines
990 B
Python

"""
Tests for Brill tagger.
"""
import unittest
from nltk.corpus import treebank
from nltk.tag import UnigramTagger, brill, brill_trainer
from nltk.tbl import demo
class TestBrill(unittest.TestCase):
def test_pos_template(self):
train_sents = treebank.tagged_sents()[:1000]
tagger = UnigramTagger(train_sents)
trainer = brill_trainer.BrillTaggerTrainer(
tagger, [brill.Template(brill.Pos([-1]))]
)
brill_tagger = trainer.train(train_sents)
# Example from https://github.com/nltk/nltk/issues/769
result = brill_tagger.tag("This is a foo bar sentence".split())
expected = [
("This", "DT"),
("is", "VBZ"),
("a", "DT"),
("foo", None),
("bar", "NN"),
("sentence", None),
]
self.assertEqual(result, expected)
@unittest.skip("Should be tested in __main__ of nltk.tbl.demo")
def test_brill_demo(self):
demo()