fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
293 lines
12 KiB
Python
293 lines
12 KiB
Python
import sys
|
|
from unittest import TestCase
|
|
|
|
import simplejson as json
|
|
import simplejson.decoder
|
|
from simplejson.compat import b, PY3
|
|
|
|
class TestScanString(TestCase):
|
|
# The bytes type is intentionally not used in most of these tests
|
|
# under Python 3 because the decoder immediately coerces to str before
|
|
# calling scanstring. In Python 2 we are testing the code paths
|
|
# for both unicode and str.
|
|
#
|
|
# The reason this is done is because Python 3 would require
|
|
# entirely different code paths for parsing bytes and str.
|
|
#
|
|
def test_py_scanstring(self):
|
|
self._test_scanstring(simplejson.decoder.py_scanstring)
|
|
|
|
def test_c_scanstring(self):
|
|
if not simplejson.decoder.c_scanstring:
|
|
return
|
|
self._test_scanstring(simplejson.decoder.c_scanstring)
|
|
|
|
self.assertTrue(isinstance(simplejson.decoder.c_scanstring('""', 0)[0], str))
|
|
|
|
def _test_scanstring(self, scanstring):
|
|
if sys.maxunicode == 65535:
|
|
self.assertEqual(
|
|
scanstring(u'"z\U0001d120x"', 1, None, True),
|
|
(u'z\U0001d120x', 6))
|
|
else:
|
|
self.assertEqual(
|
|
scanstring(u'"z\U0001d120x"', 1, None, True),
|
|
(u'z\U0001d120x', 5))
|
|
|
|
self.assertEqual(
|
|
scanstring('"\\u007b"', 1, None, True),
|
|
(u'{', 8))
|
|
|
|
self.assertEqual(
|
|
scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
|
|
(u'A JSON payload should be an object or array, not a string.', 60))
|
|
|
|
self.assertEqual(
|
|
scanstring('["Unclosed array"', 2, None, True),
|
|
(u'Unclosed array', 17))
|
|
|
|
self.assertEqual(
|
|
scanstring('["extra comma",]', 2, None, True),
|
|
(u'extra comma', 14))
|
|
|
|
self.assertEqual(
|
|
scanstring('["double extra comma",,]', 2, None, True),
|
|
(u'double extra comma', 21))
|
|
|
|
self.assertEqual(
|
|
scanstring('["Comma after the close"],', 2, None, True),
|
|
(u'Comma after the close', 24))
|
|
|
|
self.assertEqual(
|
|
scanstring('["Extra close"]]', 2, None, True),
|
|
(u'Extra close', 14))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Extra comma": true,}', 2, None, True),
|
|
(u'Extra comma', 14))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
|
|
(u'Extra value after close', 26))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
|
|
(u'Illegal expression', 21))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Illegal invocation": alert()}', 2, None, True),
|
|
(u'Illegal invocation', 21))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
|
|
(u'Numbers cannot have leading zeroes', 37))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
|
|
(u'Numbers cannot be hex', 24))
|
|
|
|
self.assertEqual(
|
|
scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
|
|
(u'Too deep', 30))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Missing colon" null}', 2, None, True),
|
|
(u'Missing colon', 16))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Double colon":: null}', 2, None, True),
|
|
(u'Double colon', 15))
|
|
|
|
self.assertEqual(
|
|
scanstring('{"Comma instead of colon", null}', 2, None, True),
|
|
(u'Comma instead of colon', 25))
|
|
|
|
self.assertEqual(
|
|
scanstring('["Colon instead of comma": false]', 2, None, True),
|
|
(u'Colon instead of comma', 25))
|
|
|
|
self.assertEqual(
|
|
scanstring('["Bad value", truth]', 2, None, True),
|
|
(u'Bad value', 12))
|
|
|
|
for c in map(chr, range(0x00, 0x1f)):
|
|
self.assertEqual(
|
|
scanstring(c + '"', 0, None, False),
|
|
(c, 2))
|
|
self.assertRaises(
|
|
ValueError,
|
|
scanstring, c + '"', 0, None, True)
|
|
|
|
self.assertRaises(ValueError, scanstring, '', 0, None, True)
|
|
self.assertRaises(ValueError, scanstring, 'a', 0, None, True)
|
|
self.assertRaises(ValueError, scanstring, '\\', 0, None, True)
|
|
self.assertRaises(ValueError, scanstring, '\\u', 0, None, True)
|
|
self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True)
|
|
self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True)
|
|
self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True)
|
|
self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True)
|
|
if sys.maxunicode > 65535:
|
|
self.assertRaises(ValueError,
|
|
scanstring, '\\ud834\\u"', 0, None, True)
|
|
self.assertRaises(ValueError,
|
|
scanstring, '\\ud834\\x0123"', 0, None, True)
|
|
|
|
self.assertRaises(json.JSONDecodeError, scanstring, '\\u-123"', 0, None, True)
|
|
# SJ-PT-23-01: Invalid Handling of Broken Unicode Escape Sequences
|
|
self.assertRaises(json.JSONDecodeError, scanstring, '\\u EDD"', 0, None, True)
|
|
|
|
def test_issue3623(self):
|
|
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
|
|
"xxx")
|
|
self.assertRaises(UnicodeDecodeError,
|
|
json.encoder.encode_basestring_ascii, b("xx\xff"))
|
|
|
|
def test_overflow(self):
|
|
# Python 2.5 does not have maxsize, Python 3 does not have maxint
|
|
maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None))
|
|
assert maxsize is not None
|
|
self.assertRaises(OverflowError, json.decoder.scanstring, "xxx",
|
|
maxsize + 1)
|
|
|
|
def test_end_out_of_bounds_is_jsondecodeerror(self):
|
|
# Regression: C scanstring used to raise a plain ValueError for
|
|
# out-of-range end indices, while py_scanstring raises
|
|
# JSONDecodeError. User code with `except JSONDecodeError:` missed
|
|
# the C path. Both backends now raise JSONDecodeError with the
|
|
# "Unterminated string starting at" message at pos = end - 1.
|
|
for s, end in (
|
|
(u'"abc"', 100),
|
|
(u'abc', 100),
|
|
(u'', 100),
|
|
(u'abc', -1),
|
|
(u'', -1),
|
|
):
|
|
with self.assertRaises(json.JSONDecodeError) as cm:
|
|
json.decoder.scanstring(s, end, None, True)
|
|
self.assertEqual(cm.exception.pos, end - 1,
|
|
'scanstring(%r, %r) pos=%r, expected %r' %
|
|
(s, end, cm.exception.pos, end - 1))
|
|
self.assertIn('Unterminated string', str(cm.exception))
|
|
|
|
def test_surrogates(self):
|
|
scanstring = json.decoder.scanstring
|
|
|
|
def assertScan(given, expect, test_utf8=True):
|
|
givens = [given]
|
|
if not PY3 and test_utf8:
|
|
givens.append(given.encode('utf8'))
|
|
for given in givens:
|
|
(res, count) = scanstring(given, 1, None, True)
|
|
self.assertEqual(len(given), count)
|
|
self.assertEqual(res, expect)
|
|
|
|
assertScan(
|
|
u'"z\\ud834\\u0079x"',
|
|
u'z\ud834yx')
|
|
assertScan(
|
|
u'"z\\ud834\\udd20x"',
|
|
u'z\U0001d120x')
|
|
assertScan(
|
|
u'"z\\ud834\\ud834\\udd20x"',
|
|
u'z\ud834\U0001d120x')
|
|
assertScan(
|
|
u'"z\\ud834x"',
|
|
u'z\ud834x')
|
|
assertScan(
|
|
u'"z\\udd20x"',
|
|
u'z\udd20x')
|
|
assertScan(
|
|
u'"z\ud834x"',
|
|
u'z\ud834x')
|
|
# It may look strange to join strings together, but Python is drunk.
|
|
# https://gist.github.com/etrepum/5538443
|
|
assertScan(
|
|
u'"z\\ud834\udd20x12345"',
|
|
u''.join([u'z\ud834', u'\udd20x12345']))
|
|
assertScan(
|
|
u'"z\ud834\\udd20x"',
|
|
u''.join([u'z\ud834', u'\udd20x']))
|
|
# these have different behavior given UTF8 input, because the surrogate
|
|
# pair may be joined (in maxunicode > 65535 builds)
|
|
assertScan(
|
|
u''.join([u'"z\ud834', u'\udd20x"']),
|
|
u''.join([u'z\ud834', u'\udd20x']),
|
|
test_utf8=False)
|
|
|
|
self.assertRaises(ValueError,
|
|
scanstring, u'"z\\ud83x"', 1, None, True)
|
|
self.assertRaises(ValueError,
|
|
scanstring, u'"z\\ud834\\udd2x"', 1, None, True)
|
|
|
|
def test_escape_error_parity(self):
|
|
# Regression: the C scanstring bounds check was `end >= len` / the
|
|
# surrogate-pair bounds check was `end + 6 < len`. Both were
|
|
# off-by-one, causing C to raise "Invalid \\uXXXX escape sequence"
|
|
# where pure-Python correctly raised "Unterminated string starting
|
|
# at" when a \\uXXXX escape used the last bytes of the buffer. The
|
|
# error-position offset also differed: C reported the position of
|
|
# the 'u' while Python reported the position of the leading '\'.
|
|
# This test asserts exact parity (exception class, position, and
|
|
# message prefix) across a matrix of edge cases.
|
|
if simplejson.decoder.c_scanstring is None:
|
|
return
|
|
|
|
def get_exc(scanstring, s):
|
|
try:
|
|
scanstring(s, 0, None, True)
|
|
except json.JSONDecodeError as e:
|
|
return (e.pos, str(e).split(':')[0])
|
|
return None
|
|
|
|
# Each case: (input, expected_pos, expected_message_prefix)
|
|
# expected_pos == -2 means (-1, 'Unterminated string starting at');
|
|
# otherwise the positional 'Invalid \\uXXXX escape sequence' error.
|
|
UNTERMINATED = (-1, 'Unterminated string starting at')
|
|
def INVALID(pos):
|
|
return (pos, 'Invalid \\uXXXX escape sequence')
|
|
|
|
cases = [
|
|
# Not enough room for 4 hex digits after \u.
|
|
(u'\\u', INVALID(0)),
|
|
(u'\\u0', INVALID(0)),
|
|
(u'\\u01', INVALID(0)),
|
|
(u'\\u012', INVALID(0)),
|
|
# 4 non-hex chars after \u — C used to raise at the 'u'.
|
|
(u'\\uXXXX', INVALID(0)),
|
|
# Exactly 4 hex digits at buffer end — C used to mis-report
|
|
# 'Invalid \\uXXXX escape' instead of 'Unterminated string'.
|
|
(u'\\u0123', UNTERMINATED),
|
|
# Lone high surrogate with no room for a second escape.
|
|
(u'\\ud834', UNTERMINATED),
|
|
# High surrogate followed by a truncated second escape.
|
|
(u'\\ud834\\u', INVALID(6)),
|
|
(u'\\ud834\\ux', INVALID(6)),
|
|
(u'\\ud834\\udd2', INVALID(6)),
|
|
(u'\\ud834\\udd2x', INVALID(6)),
|
|
# High surrogate followed by a valid low surrogate that ends
|
|
# exactly at the buffer edge — must combine before the outer
|
|
# loop reports an unterminated string.
|
|
(u'\\ud834\\udd1e', UNTERMINATED),
|
|
(u'prefix\\ud834\\udd1e', UNTERMINATED),
|
|
]
|
|
for s, expected in cases:
|
|
py = get_exc(simplejson.decoder.py_scanstring, s)
|
|
c = get_exc(simplejson.decoder.c_scanstring, s)
|
|
self.assertEqual(py, expected,
|
|
'py_scanstring(%r) expected %r, got %r' %
|
|
(s, expected, py))
|
|
self.assertEqual(c, expected,
|
|
'c_scanstring(%r) expected %r, got %r' %
|
|
(s, expected, c))
|
|
|
|
# Success paths: valid escape or surrogate pair ending at the
|
|
# closing quote must still parse correctly.
|
|
for scanstring in (simplejson.decoder.py_scanstring,
|
|
simplejson.decoder.c_scanstring):
|
|
self.assertEqual(
|
|
scanstring(u'\\u0123"', 0, None, True),
|
|
(u'\u0123', 7))
|
|
self.assertEqual(
|
|
scanstring(u'\\ud834\\udd1e"', 0, None, True),
|
|
(u'\U0001d11e', 13))
|