Files
MoFin/venv/lib/python3.12/site-packages/json_repair/json_repair.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

466 lines
21 KiB
Python

"""
This module will parse the JSON file following the BNF definition:
<json> ::= <container>
<primitive> ::= <number> | <string> | <boolean>
; Where:
; <number> is a valid real number expressed in one of a number of given formats
; <string> is a string of valid characters enclosed in quotes
; <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
<container> ::= <object> | <array>
<array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
<object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
<member> ::= <string> ': ' <json> ; A pair consisting of a name, and a JSON value
If something is wrong (a missing parentheses or quotes for example) it will use a few simple heuristics to fix the JSON string:
- Add the missing parentheses if the parser believes that the array or object should be closed
- Quote strings or add missing single quotes
- Adjust whitespaces and remove line breaks
All supported use cases are in the unit tests
"""
import argparse
import json
import sys
from pathlib import Path
from typing import Any, Literal, TextIO, overload
from .json_parser import JSONParser
from .schema_repair import SchemaRepairer, load_schema_model, normalize_schema_repair_mode, schema_from_input
from .utils.constants import JSONReturnType
@overload
def repair_json(
json_str: str = "",
return_objects: Literal[False] = False,
skip_json_loads: bool = False,
logging: Literal[False] = False,
json_fd: TextIO | None = None,
chunk_length: int = 0,
stream_stable: bool = False,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
**json_dumps_args: Any,
) -> str: ...
@overload
def repair_json(
json_str: str = "",
return_objects: Literal[True] = True,
skip_json_loads: bool = False,
logging: Literal[False] = False,
json_fd: TextIO | None = None,
chunk_length: int = 0,
stream_stable: bool = False,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
**json_dumps_args: Any,
) -> JSONReturnType: ...
@overload
def repair_json(
json_str: str = "",
return_objects: bool = False,
skip_json_loads: bool = False,
logging: Literal[True] = True,
json_fd: TextIO | None = None,
chunk_length: int = 0,
stream_stable: bool = False,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
**json_dumps_args: Any,
) -> tuple[JSONReturnType, list[dict[str, str]]]: ...
@overload
def repair_json(
json_str: str = "",
return_objects: bool = False,
skip_json_loads: bool = False,
logging: bool = False,
json_fd: TextIO | None = None,
chunk_length: int = 0,
stream_stable: bool = False,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
**json_dumps_args: Any,
) -> str | JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]: ...
def repair_json(
json_str: str = "",
return_objects: bool = False,
skip_json_loads: bool = False,
logging: bool = False,
json_fd: TextIO | None = None,
chunk_length: int = 0,
stream_stable: bool = False,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
**json_dumps_args: Any,
) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
"""
Given a json formatted string, it will try to decode it and, if it fails, it will try to fix it.
Args:
json_str (str, optional): The JSON string to repair. Defaults to an empty string.
return_objects (bool, optional): If True, return the decoded data structure. Defaults to False.
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False. When no repairs were required, the repair log will be an empty list.
json_fd (Optional[TextIO], optional): File descriptor for JSON input. Do not use! Use `from_file` or `load` instead. Defaults to None.
ensure_ascii (bool, optional): Set to False to avoid converting non-latin characters to ascii (for example when using chinese characters). Defaults to True. Ignored if `skip_json_loads` is True.
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Ignored if `json_fd` is None. Do not use! Use `from_file` or `load` instead. Defaults to 1MB.
stream_stable (bool, optional): When the json to be repaired is the accumulation of streaming json at a certain moment.If this parameter to True will keep the repair results stable.
strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
schema (Any, optional): JSON Schema dict, boolean schema, or pydantic v2 model used to guide repairs and validation for both valid and invalid JSON inputs.
schema_repair_mode (Literal["standard", "salvage"], optional): Schema repair mode. "standard" keeps default schema behavior; "salvage" enables best-effort schema salvage heuristics for arrays/objects.
Returns:
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON or a tuple with the repaired JSON and repair log when logging is True.
"""
schema_repair_mode = normalize_schema_repair_mode(schema_repair_mode)
if schema is None and schema_repair_mode == "salvage":
raise ValueError("schema_repair_mode='salvage' requires schema.")
# Schema-guided repairs and strict mode are mutually exclusive to avoid conflicting behavior.
if schema is not None and strict:
raise ValueError("schema and strict cannot be used together.")
parser: JSONParser | None = None
repair_log: list[dict[str, str]] = []
if json_fd is not None:
parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable, strict)
if logging:
repair_log = parser.logger
schema_obj = schema_from_input(schema) if schema is not None else None
repairer = (
SchemaRepairer(schema_obj, repair_log if logging else None, schema_repair_mode=schema_repair_mode)
if schema_obj is not None
else None
)
# Fast path for valid JSON: schema-aware mode still applies repair+validation.
parsed_json: JSONReturnType = None
is_valid_json = False
try:
if not skip_json_loads:
parsed_json = json.load(json_fd) if json_fd else json.loads(json_str)
if repairer is not None and schema_obj is not None:
# Validate here to ensure that we reject values that cannot satisfy the schema and fall back to the more expensive parser+schema repair if needed, instead of just returning the valid but schema-noncompliant JSON.
try:
if repairer.is_valid(parsed_json, schema_obj):
is_valid_json = True
else:
try:
# repair_value may mutate containers in place; if validate fails we still
# fall back to parser.parse_with_schema, which fully replaces parsed_json.
repaired_value = repairer.repair_value(parsed_json, schema_obj, "$")
if repairer.is_valid(repaired_value, schema_obj):
parsed_json = repaired_value
is_valid_json = True
except ValueError:
pass
except RecursionError as exc:
raise ValueError("Input schema nesting exceeds the supported schema recursion depth.") from exc
else:
is_valid_json = True
except (json.JSONDecodeError, TypeError, ValueError):
pass
if not is_valid_json:
if parser is None:
parser = JSONParser(json_str, json_fd, logging, chunk_length, stream_stable, strict)
if logging:
parser.logger = repair_log
try:
if repairer is not None and schema_obj is not None:
# If schema-guided, we want to attempt repairs even on valid JSON that fails schema validation.
try:
parsed_json = parser.parse_with_schema(repairer, schema_obj)
repairer.validate(parsed_json, schema_obj)
except RecursionError as exc:
raise ValueError("Input schema nesting exceeds the supported schema recursion depth.") from exc
else:
# Otherwise, we can skip the more expensive schema-aware parsing and just do a normal parse.
parsed_json = parser.parse()
except RecursionError as exc:
raise ValueError("Input nesting exceeds the supported parser recursion depth.") from exc
# It's useful to return the actual object instead of the json string,
# it allows this lib to be a replacement of the json library
if logging:
return parsed_json, repair_log
if return_objects:
return parsed_json
# Avoid returning only a pair of quotes if it's an empty string
if parsed_json == "":
return ""
return json.dumps(parsed_json, **json_dumps_args)
def loads(
json_str: str,
skip_json_loads: bool = False,
logging: bool = False,
stream_stable: bool = False,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]] | str:
"""
This function works like `json.loads()` except that it will fix your JSON in the process.
It is a wrapper around the `repair_json()` function with `return_objects=True`.
Args:
json_str (str): The JSON string to load and repair.
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
schema (Any, optional): JSON Schema dict, boolean schema, or pydantic v2 model used to guide repairs and validation for both valid and invalid JSON inputs.
schema_repair_mode (Literal["standard", "salvage"], optional): Schema repair mode. "salvage" requires schema.
Returns:
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]], str]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
"""
return repair_json(
json_str=json_str,
return_objects=True,
skip_json_loads=skip_json_loads,
logging=logging,
stream_stable=stream_stable,
strict=strict,
schema=schema,
schema_repair_mode=schema_repair_mode,
)
def load(
fd: TextIO,
skip_json_loads: bool = False,
logging: bool = False,
chunk_length: int = 0,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
"""
This function works like `json.load()` except that it will fix your JSON in the process.
It is a wrapper around the `repair_json()` function with `json_fd=fd` and `return_objects=True`.
Args:
fd (TextIO): File descriptor for JSON input.
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
schema (Any, optional): JSON Schema dict, boolean schema, or pydantic v2 model used to guide repairs and validation for both valid and invalid JSON inputs.
schema_repair_mode (Literal["standard", "salvage"], optional): Schema repair mode. "salvage" requires schema.
Returns:
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
"""
return repair_json(
json_fd=fd,
chunk_length=chunk_length,
return_objects=True,
skip_json_loads=skip_json_loads,
logging=logging,
strict=strict,
schema=schema,
schema_repair_mode=schema_repair_mode,
)
def from_file(
filename: str | Path,
skip_json_loads: bool = False,
logging: bool = False,
chunk_length: int = 0,
strict: bool = False,
schema: Any | None = None,
schema_repair_mode: Literal["standard", "salvage"] = "standard",
) -> JSONReturnType | tuple[JSONReturnType, list[dict[str, str]]]:
"""
This function is a wrapper around `load()` so you can pass the filename as string
Args:
filename (str | Path): The name of the file containing JSON data to load and repair.
skip_json_loads (bool, optional): If True, skip calling the built-in json.loads() function to verify that the json is valid before attempting to repair. Defaults to False.
logging (bool, optional): If True, return a tuple with the repaired json and a log of all repair actions. Defaults to False.
chunk_length (int, optional): Size in bytes of the file chunks to read at once. Defaults to 1MB.
strict (bool, optional): If True, surface structural problems (duplicate keys, missing separators, empty keys/values, etc.) as ValueError instead of repairing them.
schema (Any, optional): JSON Schema dict, boolean schema, or pydantic v2 model used to guide repairs and validation for both valid and invalid JSON inputs.
schema_repair_mode (Literal["standard", "salvage"], optional): Schema repair mode. "salvage" requires schema.
Returns:
Union[JSONReturnType, Tuple[JSONReturnType, List[Dict[str, str]]]]: The repaired JSON object or a tuple with the repaired JSON object and repair log.
"""
with Path(filename).open() as fd:
return load(
fd=fd,
skip_json_loads=skip_json_loads,
logging=logging,
chunk_length=chunk_length,
strict=strict,
schema=schema,
schema_repair_mode=schema_repair_mode,
)
def cli(inline_args: list[str] | None = None) -> int:
"""
Command-line interface for repairing and parsing JSON files.
Args:
inline_args (Optional[List[str]]): List of command-line arguments for testing purposes. Defaults to None.
- filename (str): The JSON file to repair. If omitted, the JSON is read from stdin.
- -i, --inline (bool): Replace the file inline instead of returning the output to stdout.
- -o, --output TARGET (str): If specified, the output will be written to TARGET filename instead of stdout.
- --ensure_ascii (bool): Pass ensure_ascii=True to json.dumps(). Will pass False otherwise.
- --indent INDENT (int): Number of spaces for indentation (Default 2).
- --skip-json-loads (bool): Skip initial json.loads validation.
- --schema SCHEMA (str): Path to a JSON Schema file that guides repairs.
- --schema-model MODEL (str): Pydantic v2 model in 'module:ClassName' form that guides repairs.
- --strict (bool): Raise on duplicate keys, missing separators, empty keys/values, and other unrecoverable structures instead of repairing them.
Returns:
int: Exit code of the CLI operation.
Raises:
Exception: Any exception that occurs during file processing.
Example:
>>> cli(['example.json', '--indent', '4'])
>>> cat json.txt | json_repair
"""
parser = argparse.ArgumentParser(description="Repair and parse JSON files.")
# Make the filename argument optional; if omitted, we will read from stdin.
parser.add_argument(
"filename",
nargs="?",
help="The JSON file to repair (if omitted, reads from stdin)",
)
parser.add_argument(
"-i",
"--inline",
action="store_true",
help="Replace the file inline instead of returning the output to stdout",
)
parser.add_argument(
"-o",
"--output",
metavar="TARGET",
help="If specified, the output will be written to TARGET filename instead of stdout",
)
parser.add_argument(
"--ensure_ascii",
action="store_true",
help="Pass ensure_ascii=True to json.dumps()",
)
parser.add_argument(
"--indent",
type=int,
default=2,
help="Number of spaces for indentation (Default 2)",
)
parser.add_argument(
"--skip-json-loads",
action="store_true",
help="Skip initial json.loads validation",
)
parser.add_argument(
"--schema",
metavar="SCHEMA",
help="Path to a JSON Schema file that guides repairs",
)
parser.add_argument(
"--schema-model",
metavar="MODEL",
help="Pydantic v2 model in 'module:ClassName' form that guides repairs",
)
parser.add_argument(
"--strict",
action="store_true",
help="Raise on duplicate keys, missing separators, empty keys/values, and other unrecoverable structures instead of repairing them",
)
parser.add_argument(
"--schema-repair-mode",
choices=["standard", "salvage"],
default="standard",
help="Schema repair mode: 'standard' (default) or 'salvage' (best-effort array/object salvage).",
)
args = parser.parse_args(inline_args)
# Inline mode requires a filename, so error out if none was provided.
if args.inline and not args.filename: # pragma: no cover
print("Error: Inline mode requires a filename", file=sys.stderr)
sys.exit(1)
if args.inline and args.output: # pragma: no cover
print("Error: You cannot pass both --inline and --output", file=sys.stderr)
sys.exit(1)
if args.schema and args.schema_model:
print("Error: You cannot pass both --schema and --schema-model", file=sys.stderr)
sys.exit(1)
if args.strict and (args.schema or args.schema_model):
print("Error: --strict cannot be used with --schema or --schema-model", file=sys.stderr)
sys.exit(1)
if args.schema_repair_mode == "salvage" and not (args.schema or args.schema_model):
print("Error: --schema-repair-mode salvage requires --schema or --schema-model", file=sys.stderr)
sys.exit(1)
ensure_ascii = args.ensure_ascii
try:
schema = None
if args.schema:
with Path(args.schema).open() as fd:
schema = json.load(fd)
elif args.schema_model:
schema = load_schema_model(args.schema_model)
# Use from_file if a filename is provided; otherwise read from stdin.
if args.filename:
result = from_file(
args.filename,
skip_json_loads=args.skip_json_loads,
strict=args.strict,
schema=schema,
schema_repair_mode=args.schema_repair_mode,
)
else:
data = sys.stdin.read()
result = loads(
data,
skip_json_loads=args.skip_json_loads,
strict=args.strict,
schema=schema,
schema_repair_mode=args.schema_repair_mode,
)
if args.inline or args.output:
with Path(args.output or args.filename).open(mode="w") as fd:
json.dump(result, fd, indent=args.indent, ensure_ascii=ensure_ascii)
else:
print(json.dumps(result, indent=args.indent, ensure_ascii=ensure_ascii))
except (OSError, TypeError, ValueError) as e: # pragma: no cover
print(f"Error: {e!s}", file=sys.stderr)
return 1
return 0 # Success
if __name__ == "__main__": # pragma: no cover
sys.exit(cli())