Files
MoFin/venv/lib/python3.12/site-packages/huggingface_hub/cli/buckets.py
T
知微 fa45d8aa5f fix: 小果地址统一node122(兼容LAN+EasyTier)
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
  Privoxy对node122:18003返回500,直连正常
2026-06-30 02:56:35 +08:00

679 lines
21 KiB
Python

# Copyright 2025-present, the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains commands to interact with buckets via the CLI."""
from typing import Annotated
import typer
from huggingface_hub import logging
from huggingface_hub._buckets import (
BUCKET_PREFIX,
BucketFile,
FilterMatcher,
_parse_bucket_uri,
)
from ..hf_api import REPO_REGIONS
from ._cli_utils import (
SearchOpt,
TokenOpt,
get_hf_api,
typer_factory,
)
from ._cp import make_cp
from ._file_listing import format_size, print_file_listing
from ._output import OutputFormat, out
logger = logging.get_logger(__name__)
buckets_cli = typer_factory(help="Commands to interact with buckets.")
@buckets_cli.command(
name="create",
examples=[
"hf buckets create my-bucket",
"hf buckets create user/my-bucket",
"hf buckets create hf://buckets/user/my-bucket",
"hf buckets create user/my-bucket --private",
"hf buckets create user/my-bucket --exist-ok",
"hf buckets create user/my-bucket --region us",
],
)
def create(
bucket_id: Annotated[
str,
typer.Argument(
help="Bucket ID: bucket_name, namespace/bucket_name, or hf://buckets/namespace/bucket_name",
),
],
private: Annotated[
bool,
typer.Option(
"--private",
help="Create a private bucket.",
),
] = False,
region: Annotated[
REPO_REGIONS | None,
typer.Option(
"--region",
help="Cloud region in which to create the bucket. Can be one of 'us' or 'eu'. Requires Team plan or above.",
),
] = None,
exist_ok: Annotated[
bool,
typer.Option(
"--exist-ok",
help="Do not raise an error if the bucket already exists.",
),
] = False,
token: TokenOpt = None,
) -> None:
"""Create a new bucket."""
api = get_hf_api(token=token)
if bucket_id.startswith(BUCKET_PREFIX):
parsed = _parse_bucket_uri(bucket_id)
if parsed.path_in_repo:
raise typer.BadParameter(
f"Cannot specify a prefix for bucket creation: {bucket_id}."
f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name."
)
bucket_id = parsed.id
bucket_url = api.create_bucket(
bucket_id,
private=private if private else None,
region=region,
exist_ok=exist_ok,
)
out.result("Bucket created", uri=bucket_url.uri.to_uri(), url=bucket_url.url)
def _is_bucket_id(argument: str) -> bool:
"""Check if argument is a bucket ID (namespace/name) vs just a namespace."""
if argument.startswith(BUCKET_PREFIX):
path = argument[len(BUCKET_PREFIX) :]
else:
path = argument
return "/" in path
@buckets_cli.command(
name="list | ls",
examples=[
"hf buckets list",
"hf buckets list huggingface",
'hf buckets list --search "my-prefix"',
"hf buckets list user/my-bucket",
"hf buckets list user/my-bucket -R",
"hf buckets list user/my-bucket -h",
"hf buckets list user/my-bucket --tree",
"hf buckets list user/my-bucket --tree -h",
"hf buckets list hf://buckets/user/my-bucket",
"hf buckets list user/my-bucket/sub -R",
],
)
def list_cmd(
argument: Annotated[
str | None,
typer.Argument(
help=(
"Namespace (user or org) to list buckets, or bucket ID"
" (namespace/bucket_name(/prefix) or hf://buckets/...) to list files."
),
),
] = None,
human_readable: Annotated[
bool,
typer.Option(
"--human-readable",
"-h",
help="Show sizes in human readable format.",
),
] = False,
as_tree: Annotated[
bool,
typer.Option(
"--tree",
help="List files in tree format (only for listing files).",
),
] = False,
recursive: Annotated[
bool,
typer.Option(
"--recursive",
"-R",
help="List files recursively (only for listing files).",
),
] = False,
search: SearchOpt = None,
token: TokenOpt = None,
) -> None:
"""List buckets or files in a bucket.
When called with no argument or a namespace, lists buckets.
When called with a bucket ID (namespace/bucket_name), lists files in the bucket.
"""
# Determine mode: listing buckets or listing files
is_file_mode = argument is not None and _is_bucket_id(argument)
if is_file_mode:
if search is not None:
raise typer.BadParameter("Cannot use --search when listing files.")
_list_files(
argument=argument, # type: ignore
human_readable=human_readable,
as_tree=as_tree,
recursive=recursive,
token=token,
)
else:
_list_buckets(
namespace=argument,
search=search,
human_readable=human_readable,
as_tree=as_tree,
recursive=recursive,
token=token,
)
def _list_buckets(
namespace: str | None,
search: str | None,
human_readable: bool,
as_tree: bool,
recursive: bool,
token: str | None,
) -> None:
"""List buckets in a namespace."""
# Validate incompatible flags
if as_tree:
raise typer.BadParameter("Cannot use --tree when listing buckets.")
if recursive:
raise typer.BadParameter("Cannot use --recursive when listing buckets.")
# Handle hf://buckets/namespace format
if namespace is not None and namespace.startswith(BUCKET_PREFIX):
namespace = namespace[len(BUCKET_PREFIX) :]
# Strip trailing slash if any
namespace = namespace.rstrip("/")
api = get_hf_api(token=token)
items = [
{
"id": bucket.id,
"private": bucket.private,
"size": format_size(bucket.size, human_readable) if human_readable else bucket.size,
"total_files": bucket.total_files,
"created_at": bucket.created_at,
}
for bucket in api.list_buckets(namespace=namespace, search=search)
]
out.table(items, alignments={"size": "right"})
def _list_files(
argument: str,
human_readable: bool,
as_tree: bool,
recursive: bool,
token: str | None,
) -> None:
"""List files in a bucket."""
if as_tree and out.mode == OutputFormat.json:
raise typer.BadParameter("Cannot use --tree with --format json.")
api = get_hf_api(token=token)
parsed = _parse_bucket_uri(argument)
items = list(
api.list_bucket_tree(
parsed.id,
prefix=parsed.path_in_repo or None,
recursive=recursive,
)
)
print_file_listing(items, human_readable=human_readable, as_tree=as_tree, recursive=recursive)
@buckets_cli.command(
name="info",
examples=[
"hf buckets info user/my-bucket",
"hf buckets info hf://buckets/user/my-bucket",
],
)
def info(
bucket_id: Annotated[
str,
typer.Argument(
help="Bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name",
),
],
token: TokenOpt = None,
) -> None:
"""Get info about a bucket."""
api = get_hf_api(token=token)
parsed = _parse_bucket_uri(bucket_id)
bucket = api.bucket_info(parsed.id)
out.dict(bucket, id_key="id")
@buckets_cli.command(
name="delete",
examples=[
"hf buckets delete user/my-bucket",
"hf buckets delete hf://buckets/user/my-bucket",
"hf buckets delete user/my-bucket --yes",
"hf buckets delete user/my-bucket --missing-ok",
],
)
def delete(
bucket_id: Annotated[
str,
typer.Argument(
help="Bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name",
),
],
yes: Annotated[
bool,
typer.Option(
"--yes",
"-y",
help="Skip confirmation prompt.",
),
] = False,
missing_ok: Annotated[
bool,
typer.Option(
"--missing-ok",
help="Do not raise an error if the bucket does not exist.",
),
] = False,
token: TokenOpt = None,
) -> None:
"""Delete a bucket.
This deletes the entire bucket and all its contents. Use `hf buckets rm` to remove individual files.
"""
if bucket_id.startswith(BUCKET_PREFIX):
parsed = _parse_bucket_uri(bucket_id)
if parsed.path_in_repo:
raise typer.BadParameter(
f"Cannot specify a prefix for bucket deletion: {bucket_id}."
f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name."
)
bucket_id = parsed.id
elif "/" not in bucket_id:
raise typer.BadParameter(
f"Invalid bucket ID: {bucket_id}."
f" Must be in format namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name."
)
out.confirm(f"Are you sure you want to delete bucket '{bucket_id}'?", yes=yes)
api = get_hf_api(token=token)
api.delete_bucket(bucket_id, missing_ok=missing_ok)
out.result("Bucket deleted", bucket_id=bucket_id)
@buckets_cli.command(
name="remove | rm",
examples=[
"hf buckets remove user/my-bucket/file.txt",
"hf buckets rm hf://buckets/user/my-bucket/file.txt",
"hf buckets rm user/my-bucket/logs/ --recursive",
'hf buckets rm user/my-bucket --recursive --include "*.tmp"',
"hf buckets rm user/my-bucket/data/ --recursive --dry-run",
],
)
def remove(
argument: Annotated[
str,
typer.Argument(
help=(
"Bucket path: namespace/bucket_name/path or hf://buckets/namespace/bucket_name/path."
" With --recursive, namespace/bucket_name is also accepted to target all files."
),
),
],
recursive: Annotated[
bool,
typer.Option(
"--recursive",
"-R",
help="Remove files recursively under the given prefix.",
),
] = False,
yes: Annotated[
bool,
typer.Option(
"--yes",
"-y",
help="Skip confirmation prompt.",
),
] = False,
dry_run: Annotated[
bool,
typer.Option(
"--dry-run",
help="Preview what would be deleted without actually deleting.",
),
] = False,
include: Annotated[
list[str] | None,
typer.Option(
help="Include only files matching pattern (can specify multiple). Requires --recursive.",
),
] = None,
exclude: Annotated[
list[str] | None,
typer.Option(
help="Exclude files matching pattern (can specify multiple). Requires --recursive.",
),
] = None,
token: TokenOpt = None,
) -> None:
"""Remove files from a bucket.
To delete an entire bucket, use `hf buckets delete` instead.
"""
parsed = _parse_bucket_uri(argument)
bucket_id = parsed.id
prefix = parsed.path_in_repo
if prefix == "" and not recursive:
raise typer.BadParameter(
f"No file path specified. To remove files, provide a path"
f" (e.g. '{bucket_id}/FILE') or use --recursive to remove all files."
f" To delete the entire bucket, use `hf buckets delete {bucket_id}`."
)
if (include or exclude) and not recursive:
raise typer.BadParameter("--include and --exclude require --recursive.")
api = get_hf_api(token=token)
if recursive:
status = out.status("Listing files from remote")
all_files: list[BucketFile] = []
for item in api.list_bucket_tree(
bucket_id,
prefix=prefix or None,
recursive=True,
):
if isinstance(item, BucketFile):
all_files.append(item)
status.update(f"Listing files from remote ({len(all_files)} files)")
status.done(f"Listing files from remote ({len(all_files)} files)")
if include or exclude:
matcher = FilterMatcher(include_patterns=include, exclude_patterns=exclude)
matched_files = [f for f in all_files if matcher.matches(f.path)]
else:
matched_files = all_files
file_paths = [f.path for f in matched_files]
total_size = sum(f.size for f in matched_files)
size_str = format_size(total_size, human_readable=True)
if not file_paths:
out.text("No files to remove.")
return
count_label = f"{len(file_paths)} file(s) totaling {size_str}"
if not yes and not dry_run:
out.text("\n".join(f" {path}" for path in file_paths))
out.confirm(f"Remove {count_label} from '{bucket_id}'?", yes=False)
if dry_run:
out.text("\n".join(f"delete: {BUCKET_PREFIX}{bucket_id}/{path}" for path in file_paths))
out.text(f"(dry run) {count_label} would be removed.")
return
api.batch_bucket_files(bucket_id, delete=file_paths)
out.result(
f"Removed {count_label} from '{bucket_id}'",
bucket_id=bucket_id,
files_deleted=len(file_paths),
size=size_str,
)
else:
file_path = prefix
if not file_path:
raise typer.BadParameter("File path cannot be empty.")
if dry_run:
out.text(f"delete: {BUCKET_PREFIX}{bucket_id}/{file_path}")
out.text("(dry run) 1 file would be removed.")
return
out.confirm(f"Remove '{file_path}' from '{bucket_id}'?", yes=yes)
api.batch_bucket_files(bucket_id, delete=[file_path])
out.result("File removed", path=file_path, bucket_id=bucket_id)
@buckets_cli.command(
name="move",
examples=[
"hf buckets move user/old-bucket user/new-bucket",
"hf buckets move user/my-bucket my-org/my-bucket",
"hf buckets move hf://buckets/user/old-bucket hf://buckets/user/new-bucket",
],
)
def move(
from_id: Annotated[
str,
typer.Argument(
help="Source bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name",
),
],
to_id: Annotated[
str,
typer.Argument(
help="Destination bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name",
),
],
token: TokenOpt = None,
) -> None:
"""Move (rename) a bucket to a new name or namespace."""
# Parse from_id
parsed_from = _parse_bucket_uri(from_id)
if parsed_from.path_in_repo:
raise typer.BadParameter(
f"Cannot specify a prefix for bucket move: {from_id}."
f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name."
)
# Parse to_id
parsed_to = _parse_bucket_uri(to_id)
if parsed_to.path_in_repo:
raise typer.BadParameter(
f"Cannot specify a prefix for bucket move: {to_id}."
f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name."
)
api = get_hf_api(token=token)
api.move_bucket(from_id=parsed_from.id, to_id=parsed_to.id)
out.result("Bucket moved", from_id=parsed_from.id, to_id=parsed_to.id)
# =============================================================================
# Sync command
# =============================================================================
@buckets_cli.command(
name="sync",
examples=[
"hf buckets sync ./data hf://buckets/user/my-bucket",
"hf buckets sync hf://buckets/user/my-bucket ./data",
"hf buckets sync ./data hf://buckets/user/my-bucket --delete",
'hf buckets sync hf://buckets/user/my-bucket ./data --include "*.safetensors" --exclude "*.tmp"',
"hf buckets sync ./data hf://buckets/user/my-bucket --plan sync-plan.jsonl",
"hf buckets sync --apply sync-plan.jsonl",
"hf buckets sync ./data hf://buckets/user/my-bucket --dry-run",
"hf buckets sync ./data hf://buckets/user/my-bucket --dry-run | jq .",
],
)
def sync(
source: Annotated[
str | None,
typer.Argument(
help="Source path: local directory or hf://buckets/namespace/bucket_name(/prefix)",
),
] = None,
dest: Annotated[
str | None,
typer.Argument(
help="Destination path: local directory or hf://buckets/namespace/bucket_name(/prefix)",
),
] = None,
delete: Annotated[
bool,
typer.Option(
help="Delete destination files not present in source.",
),
] = False,
ignore_times: Annotated[
bool,
typer.Option(
"--ignore-times",
help="Skip files only based on size, ignoring modification times.",
),
] = False,
ignore_sizes: Annotated[
bool,
typer.Option(
"--ignore-sizes",
help="Skip files only based on modification times, ignoring sizes.",
),
] = False,
plan: Annotated[
str | None,
typer.Option(
help="Save sync plan to JSONL file for review instead of executing.",
),
] = None,
apply: Annotated[
str | None,
typer.Option(
help="Apply a previously saved plan file.",
),
] = None,
dry_run: Annotated[
bool,
typer.Option(
"--dry-run",
help="Print sync plan to stdout as JSONL without executing.",
),
] = False,
include: Annotated[
list[str] | None,
typer.Option(
help="Include files matching pattern (can specify multiple).",
),
] = None,
exclude: Annotated[
list[str] | None,
typer.Option(
help="Exclude files matching pattern (can specify multiple).",
),
] = None,
filter_from: Annotated[
str | None,
typer.Option(
help="Read include/exclude patterns from file.",
),
] = None,
existing: Annotated[
bool,
typer.Option(
"--existing",
help="Skip creating new files on receiver (only update existing files).",
),
] = False,
ignore_existing: Annotated[
bool,
typer.Option(
"--ignore-existing",
help="Skip updating files that exist on receiver (only create new files).",
),
] = False,
verbose: Annotated[
bool,
typer.Option(
"--verbose",
"-v",
help="Show detailed logging with reasoning.",
),
] = False,
token: TokenOpt = None,
) -> None:
"""Sync files between local directory and a bucket."""
api = get_hf_api(token=token)
api.sync_bucket(
source=source,
dest=dest,
delete=delete,
ignore_times=ignore_times,
ignore_sizes=ignore_sizes,
existing=existing,
ignore_existing=ignore_existing,
include=include,
exclude=exclude,
filter_from=filter_from,
plan=plan,
apply=apply,
dry_run=dry_run,
verbose=verbose,
quiet=out.is_quiet(),
)
if plan and not out.is_quiet():
out.hint(f"Run `hf buckets sync --apply {plan}` to execute this plan.")
# =============================================================================
# Cp command
# =============================================================================
# `hf buckets cp` is an alias for the top-level `hf cp` command (see `cli/_cp.py`).
buckets_cli.command(
name="cp",
examples=[
# Download (repo or bucket -> local / stdout)
"hf buckets cp hf://buckets/username/my-bucket/config.json config.json",
"hf buckets cp hf://buckets/username/my-bucket/data.csv data/",
"hf buckets cp hf://buckets/username/my-bucket/config.json -",
# Upload (local / stdin -> bucket)
"hf buckets cp model.safetensors hf://buckets/username/my-bucket/model.safetensors",
"hf buckets cp config.json hf://buckets/username/my-bucket/logs/",
"hf buckets cp - hf://buckets/username/my-bucket/config.json",
# Remote to remote (repo or bucket -> bucket)
"hf buckets cp hf://buckets/username/my-bucket/data.csv hf://buckets/username/dest-bucket/",
"hf buckets cp hf://buckets/username/source-bucket/logs/ hf://buckets/username/dest-bucket/logs/",
],
)(make_cp("buckets"))