Files
MoFin/venv/lib/python3.12/site-packages/nltk/test/treetransforms.doctest
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

155 lines
4.7 KiB
Plaintext

.. Copyright (C) 2001-2026 NLTK Project
.. For license information, see LICENSE.TXT
-------------------------------------------
Unit tests for the TreeTransformation class
-------------------------------------------
>>> from copy import deepcopy
>>> from nltk.tree import Tree, collapse_unary, chomsky_normal_form, un_chomsky_normal_form
>>> tree_string = "(TOP (S (S (VP (VBN Turned) (ADVP (RB loose)) (PP (IN in) (NP (NP (NNP Shane) (NNP Longman) (POS 's)) (NN trading) (NN room))))) (, ,) (NP (DT the) (NN yuppie) (NNS dealers)) (VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))) (. .)))"
>>> tree = Tree.fromstring(tree_string)
>>> print(tree)
(TOP
(S
(S
(VP
(VBN Turned)
(ADVP (RB loose))
(PP
(IN in)
(NP
(NP (NNP Shane) (NNP Longman) (POS 's))
(NN trading)
(NN room)))))
(, ,)
(NP (DT the) (NN yuppie) (NNS dealers))
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
(. .)))
Make a copy of the original tree and collapse the subtrees with only one child
>>> collapsedTree = deepcopy(tree)
>>> collapse_unary(collapsedTree)
>>> print(collapsedTree)
(TOP
(S
(S+VP
(VBN Turned)
(ADVP (RB loose))
(PP
(IN in)
(NP
(NP (NNP Shane) (NNP Longman) (POS 's))
(NN trading)
(NN room))))
(, ,)
(NP (DT the) (NN yuppie) (NNS dealers))
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
(. .)))
>>> collapsedTree2 = deepcopy(tree)
>>> collapse_unary(collapsedTree2, collapsePOS=True, collapseRoot=True)
>>> print(collapsedTree2)
(TOP+S
(S+VP
(VBN Turned)
(ADVP+RB loose)
(PP
(IN in)
(NP
(NP (NNP Shane) (NNP Longman) (POS 's))
(NN trading)
(NN room))))
(, ,)
(NP (DT the) (NN yuppie) (NNS dealers))
(VP (AUX do) (NP (NP+RB little) (ADJP+RB right)))
(. .))
Convert the tree to Chomsky Normal Form i.e. each subtree has either two
subtree children or a single leaf value. This conversion can be performed
using either left- or right-factoring.
>>> cnfTree = deepcopy(collapsedTree)
>>> chomsky_normal_form(cnfTree, factor='left')
>>> print(cnfTree)
(TOP
(S
(S|<S+VP-,-NP-VP>
(S|<S+VP-,-NP>
(S|<S+VP-,>
(S+VP
(S+VP|<VBN-ADVP> (VBN Turned) (ADVP (RB loose)))
(PP
(IN in)
(NP
(NP|<NP-NN>
(NP
(NP|<NNP-NNP> (NNP Shane) (NNP Longman))
(POS 's))
(NN trading))
(NN room))))
(, ,))
(NP (NP|<DT-NN> (DT the) (NN yuppie)) (NNS dealers)))
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right)))))
(. .)))
>>> cnfTree = deepcopy(collapsedTree)
>>> chomsky_normal_form(cnfTree, factor='right')
>>> print(cnfTree)
(TOP
(S
(S+VP
(VBN Turned)
(S+VP|<ADVP-PP>
(ADVP (RB loose))
(PP
(IN in)
(NP
(NP (NNP Shane) (NP|<NNP-POS> (NNP Longman) (POS 's)))
(NP|<NN-NN> (NN trading) (NN room))))))
(S|<,-NP-VP-.>
(, ,)
(S|<NP-VP-.>
(NP (DT the) (NP|<NN-NNS> (NN yuppie) (NNS dealers)))
(S|<VP-.>
(VP (AUX do) (NP (NP (RB little)) (ADJP (RB right))))
(. .))))))
Employ some Markov smoothing to make the artificial node labels a bit more
readable. See the treetransforms.py documentation for more details.
>>> markovTree = deepcopy(collapsedTree)
>>> chomsky_normal_form(markovTree, horzMarkov=2, vertMarkov=1)
>>> print(markovTree)
(TOP
(S^<TOP>
(S+VP^<S>
(VBN Turned)
(S+VP|<ADVP-PP>^<S>
(ADVP^<S+VP> (RB loose))
(PP^<S+VP>
(IN in)
(NP^<PP>
(NP^<NP>
(NNP Shane)
(NP|<NNP-POS>^<NP> (NNP Longman) (POS 's)))
(NP|<NN-NN>^<PP> (NN trading) (NN room))))))
(S|<,-NP>^<TOP>
(, ,)
(S|<NP-VP>^<TOP>
(NP^<S> (DT the) (NP|<NN-NNS>^<S> (NN yuppie) (NNS dealers)))
(S|<VP-.>^<TOP>
(VP^<S>
(AUX do)
(NP^<VP> (NP^<NP> (RB little)) (ADJP^<NP> (RB right))))
(. .))))))
Convert the transformed tree back to its original form
>>> un_chomsky_normal_form(markovTree)
>>> tree == markovTree
True