# Copyright 2025-present, the HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Contains commands to interact with buckets via the CLI.""" from typing import Annotated import typer from huggingface_hub import logging from huggingface_hub._buckets import ( BUCKET_PREFIX, BucketFile, FilterMatcher, _parse_bucket_uri, ) from ..hf_api import REPO_REGIONS from ._cli_utils import ( SearchOpt, TokenOpt, get_hf_api, typer_factory, ) from ._cp import make_cp from ._file_listing import format_size, print_file_listing from ._output import OutputFormat, out logger = logging.get_logger(__name__) buckets_cli = typer_factory(help="Commands to interact with buckets.") @buckets_cli.command( name="create", examples=[ "hf buckets create my-bucket", "hf buckets create user/my-bucket", "hf buckets create hf://buckets/user/my-bucket", "hf buckets create user/my-bucket --private", "hf buckets create user/my-bucket --exist-ok", "hf buckets create user/my-bucket --region us", ], ) def create( bucket_id: Annotated[ str, typer.Argument( help="Bucket ID: bucket_name, namespace/bucket_name, or hf://buckets/namespace/bucket_name", ), ], private: Annotated[ bool, typer.Option( "--private", help="Create a private bucket.", ), ] = False, region: Annotated[ REPO_REGIONS | None, typer.Option( "--region", help="Cloud region in which to create the bucket. Can be one of 'us' or 'eu'. Requires Team plan or above.", ), ] = None, exist_ok: Annotated[ bool, typer.Option( "--exist-ok", help="Do not raise an error if the bucket already exists.", ), ] = False, token: TokenOpt = None, ) -> None: """Create a new bucket.""" api = get_hf_api(token=token) if bucket_id.startswith(BUCKET_PREFIX): parsed = _parse_bucket_uri(bucket_id) if parsed.path_in_repo: raise typer.BadParameter( f"Cannot specify a prefix for bucket creation: {bucket_id}." f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name." ) bucket_id = parsed.id bucket_url = api.create_bucket( bucket_id, private=private if private else None, region=region, exist_ok=exist_ok, ) out.result("Bucket created", uri=bucket_url.uri.to_uri(), url=bucket_url.url) def _is_bucket_id(argument: str) -> bool: """Check if argument is a bucket ID (namespace/name) vs just a namespace.""" if argument.startswith(BUCKET_PREFIX): path = argument[len(BUCKET_PREFIX) :] else: path = argument return "/" in path @buckets_cli.command( name="list | ls", examples=[ "hf buckets list", "hf buckets list huggingface", 'hf buckets list --search "my-prefix"', "hf buckets list user/my-bucket", "hf buckets list user/my-bucket -R", "hf buckets list user/my-bucket -h", "hf buckets list user/my-bucket --tree", "hf buckets list user/my-bucket --tree -h", "hf buckets list hf://buckets/user/my-bucket", "hf buckets list user/my-bucket/sub -R", ], ) def list_cmd( argument: Annotated[ str | None, typer.Argument( help=( "Namespace (user or org) to list buckets, or bucket ID" " (namespace/bucket_name(/prefix) or hf://buckets/...) to list files." ), ), ] = None, human_readable: Annotated[ bool, typer.Option( "--human-readable", "-h", help="Show sizes in human readable format.", ), ] = False, as_tree: Annotated[ bool, typer.Option( "--tree", help="List files in tree format (only for listing files).", ), ] = False, recursive: Annotated[ bool, typer.Option( "--recursive", "-R", help="List files recursively (only for listing files).", ), ] = False, search: SearchOpt = None, token: TokenOpt = None, ) -> None: """List buckets or files in a bucket. When called with no argument or a namespace, lists buckets. When called with a bucket ID (namespace/bucket_name), lists files in the bucket. """ # Determine mode: listing buckets or listing files is_file_mode = argument is not None and _is_bucket_id(argument) if is_file_mode: if search is not None: raise typer.BadParameter("Cannot use --search when listing files.") _list_files( argument=argument, # type: ignore human_readable=human_readable, as_tree=as_tree, recursive=recursive, token=token, ) else: _list_buckets( namespace=argument, search=search, human_readable=human_readable, as_tree=as_tree, recursive=recursive, token=token, ) def _list_buckets( namespace: str | None, search: str | None, human_readable: bool, as_tree: bool, recursive: bool, token: str | None, ) -> None: """List buckets in a namespace.""" # Validate incompatible flags if as_tree: raise typer.BadParameter("Cannot use --tree when listing buckets.") if recursive: raise typer.BadParameter("Cannot use --recursive when listing buckets.") # Handle hf://buckets/namespace format if namespace is not None and namespace.startswith(BUCKET_PREFIX): namespace = namespace[len(BUCKET_PREFIX) :] # Strip trailing slash if any namespace = namespace.rstrip("/") api = get_hf_api(token=token) items = [ { "id": bucket.id, "private": bucket.private, "size": format_size(bucket.size, human_readable) if human_readable else bucket.size, "total_files": bucket.total_files, "created_at": bucket.created_at, } for bucket in api.list_buckets(namespace=namespace, search=search) ] out.table(items, alignments={"size": "right"}) def _list_files( argument: str, human_readable: bool, as_tree: bool, recursive: bool, token: str | None, ) -> None: """List files in a bucket.""" if as_tree and out.mode == OutputFormat.json: raise typer.BadParameter("Cannot use --tree with --format json.") api = get_hf_api(token=token) parsed = _parse_bucket_uri(argument) items = list( api.list_bucket_tree( parsed.id, prefix=parsed.path_in_repo or None, recursive=recursive, ) ) print_file_listing(items, human_readable=human_readable, as_tree=as_tree, recursive=recursive) @buckets_cli.command( name="info", examples=[ "hf buckets info user/my-bucket", "hf buckets info hf://buckets/user/my-bucket", ], ) def info( bucket_id: Annotated[ str, typer.Argument( help="Bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name", ), ], token: TokenOpt = None, ) -> None: """Get info about a bucket.""" api = get_hf_api(token=token) parsed = _parse_bucket_uri(bucket_id) bucket = api.bucket_info(parsed.id) out.dict(bucket, id_key="id") @buckets_cli.command( name="delete", examples=[ "hf buckets delete user/my-bucket", "hf buckets delete hf://buckets/user/my-bucket", "hf buckets delete user/my-bucket --yes", "hf buckets delete user/my-bucket --missing-ok", ], ) def delete( bucket_id: Annotated[ str, typer.Argument( help="Bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name", ), ], yes: Annotated[ bool, typer.Option( "--yes", "-y", help="Skip confirmation prompt.", ), ] = False, missing_ok: Annotated[ bool, typer.Option( "--missing-ok", help="Do not raise an error if the bucket does not exist.", ), ] = False, token: TokenOpt = None, ) -> None: """Delete a bucket. This deletes the entire bucket and all its contents. Use `hf buckets rm` to remove individual files. """ if bucket_id.startswith(BUCKET_PREFIX): parsed = _parse_bucket_uri(bucket_id) if parsed.path_in_repo: raise typer.BadParameter( f"Cannot specify a prefix for bucket deletion: {bucket_id}." f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name." ) bucket_id = parsed.id elif "/" not in bucket_id: raise typer.BadParameter( f"Invalid bucket ID: {bucket_id}." f" Must be in format namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name." ) out.confirm(f"Are you sure you want to delete bucket '{bucket_id}'?", yes=yes) api = get_hf_api(token=token) api.delete_bucket(bucket_id, missing_ok=missing_ok) out.result("Bucket deleted", bucket_id=bucket_id) @buckets_cli.command( name="remove | rm", examples=[ "hf buckets remove user/my-bucket/file.txt", "hf buckets rm hf://buckets/user/my-bucket/file.txt", "hf buckets rm user/my-bucket/logs/ --recursive", 'hf buckets rm user/my-bucket --recursive --include "*.tmp"', "hf buckets rm user/my-bucket/data/ --recursive --dry-run", ], ) def remove( argument: Annotated[ str, typer.Argument( help=( "Bucket path: namespace/bucket_name/path or hf://buckets/namespace/bucket_name/path." " With --recursive, namespace/bucket_name is also accepted to target all files." ), ), ], recursive: Annotated[ bool, typer.Option( "--recursive", "-R", help="Remove files recursively under the given prefix.", ), ] = False, yes: Annotated[ bool, typer.Option( "--yes", "-y", help="Skip confirmation prompt.", ), ] = False, dry_run: Annotated[ bool, typer.Option( "--dry-run", help="Preview what would be deleted without actually deleting.", ), ] = False, include: Annotated[ list[str] | None, typer.Option( help="Include only files matching pattern (can specify multiple). Requires --recursive.", ), ] = None, exclude: Annotated[ list[str] | None, typer.Option( help="Exclude files matching pattern (can specify multiple). Requires --recursive.", ), ] = None, token: TokenOpt = None, ) -> None: """Remove files from a bucket. To delete an entire bucket, use `hf buckets delete` instead. """ parsed = _parse_bucket_uri(argument) bucket_id = parsed.id prefix = parsed.path_in_repo if prefix == "" and not recursive: raise typer.BadParameter( f"No file path specified. To remove files, provide a path" f" (e.g. '{bucket_id}/FILE') or use --recursive to remove all files." f" To delete the entire bucket, use `hf buckets delete {bucket_id}`." ) if (include or exclude) and not recursive: raise typer.BadParameter("--include and --exclude require --recursive.") api = get_hf_api(token=token) if recursive: status = out.status("Listing files from remote") all_files: list[BucketFile] = [] for item in api.list_bucket_tree( bucket_id, prefix=prefix or None, recursive=True, ): if isinstance(item, BucketFile): all_files.append(item) status.update(f"Listing files from remote ({len(all_files)} files)") status.done(f"Listing files from remote ({len(all_files)} files)") if include or exclude: matcher = FilterMatcher(include_patterns=include, exclude_patterns=exclude) matched_files = [f for f in all_files if matcher.matches(f.path)] else: matched_files = all_files file_paths = [f.path for f in matched_files] total_size = sum(f.size for f in matched_files) size_str = format_size(total_size, human_readable=True) if not file_paths: out.text("No files to remove.") return count_label = f"{len(file_paths)} file(s) totaling {size_str}" if not yes and not dry_run: out.text("\n".join(f" {path}" for path in file_paths)) out.confirm(f"Remove {count_label} from '{bucket_id}'?", yes=False) if dry_run: out.text("\n".join(f"delete: {BUCKET_PREFIX}{bucket_id}/{path}" for path in file_paths)) out.text(f"(dry run) {count_label} would be removed.") return api.batch_bucket_files(bucket_id, delete=file_paths) out.result( f"Removed {count_label} from '{bucket_id}'", bucket_id=bucket_id, files_deleted=len(file_paths), size=size_str, ) else: file_path = prefix if not file_path: raise typer.BadParameter("File path cannot be empty.") if dry_run: out.text(f"delete: {BUCKET_PREFIX}{bucket_id}/{file_path}") out.text("(dry run) 1 file would be removed.") return out.confirm(f"Remove '{file_path}' from '{bucket_id}'?", yes=yes) api.batch_bucket_files(bucket_id, delete=[file_path]) out.result("File removed", path=file_path, bucket_id=bucket_id) @buckets_cli.command( name="move", examples=[ "hf buckets move user/old-bucket user/new-bucket", "hf buckets move user/my-bucket my-org/my-bucket", "hf buckets move hf://buckets/user/old-bucket hf://buckets/user/new-bucket", ], ) def move( from_id: Annotated[ str, typer.Argument( help="Source bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name", ), ], to_id: Annotated[ str, typer.Argument( help="Destination bucket ID: namespace/bucket_name or hf://buckets/namespace/bucket_name", ), ], token: TokenOpt = None, ) -> None: """Move (rename) a bucket to a new name or namespace.""" # Parse from_id parsed_from = _parse_bucket_uri(from_id) if parsed_from.path_in_repo: raise typer.BadParameter( f"Cannot specify a prefix for bucket move: {from_id}." f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name." ) # Parse to_id parsed_to = _parse_bucket_uri(to_id) if parsed_to.path_in_repo: raise typer.BadParameter( f"Cannot specify a prefix for bucket move: {to_id}." f" Use namespace/bucket_name or {BUCKET_PREFIX}namespace/bucket_name." ) api = get_hf_api(token=token) api.move_bucket(from_id=parsed_from.id, to_id=parsed_to.id) out.result("Bucket moved", from_id=parsed_from.id, to_id=parsed_to.id) # ============================================================================= # Sync command # ============================================================================= @buckets_cli.command( name="sync", examples=[ "hf buckets sync ./data hf://buckets/user/my-bucket", "hf buckets sync hf://buckets/user/my-bucket ./data", "hf buckets sync ./data hf://buckets/user/my-bucket --delete", 'hf buckets sync hf://buckets/user/my-bucket ./data --include "*.safetensors" --exclude "*.tmp"', "hf buckets sync ./data hf://buckets/user/my-bucket --plan sync-plan.jsonl", "hf buckets sync --apply sync-plan.jsonl", "hf buckets sync ./data hf://buckets/user/my-bucket --dry-run", "hf buckets sync ./data hf://buckets/user/my-bucket --dry-run | jq .", ], ) def sync( source: Annotated[ str | None, typer.Argument( help="Source path: local directory or hf://buckets/namespace/bucket_name(/prefix)", ), ] = None, dest: Annotated[ str | None, typer.Argument( help="Destination path: local directory or hf://buckets/namespace/bucket_name(/prefix)", ), ] = None, delete: Annotated[ bool, typer.Option( help="Delete destination files not present in source.", ), ] = False, ignore_times: Annotated[ bool, typer.Option( "--ignore-times", help="Skip files only based on size, ignoring modification times.", ), ] = False, ignore_sizes: Annotated[ bool, typer.Option( "--ignore-sizes", help="Skip files only based on modification times, ignoring sizes.", ), ] = False, plan: Annotated[ str | None, typer.Option( help="Save sync plan to JSONL file for review instead of executing.", ), ] = None, apply: Annotated[ str | None, typer.Option( help="Apply a previously saved plan file.", ), ] = None, dry_run: Annotated[ bool, typer.Option( "--dry-run", help="Print sync plan to stdout as JSONL without executing.", ), ] = False, include: Annotated[ list[str] | None, typer.Option( help="Include files matching pattern (can specify multiple).", ), ] = None, exclude: Annotated[ list[str] | None, typer.Option( help="Exclude files matching pattern (can specify multiple).", ), ] = None, filter_from: Annotated[ str | None, typer.Option( help="Read include/exclude patterns from file.", ), ] = None, existing: Annotated[ bool, typer.Option( "--existing", help="Skip creating new files on receiver (only update existing files).", ), ] = False, ignore_existing: Annotated[ bool, typer.Option( "--ignore-existing", help="Skip updating files that exist on receiver (only create new files).", ), ] = False, verbose: Annotated[ bool, typer.Option( "--verbose", "-v", help="Show detailed logging with reasoning.", ), ] = False, token: TokenOpt = None, ) -> None: """Sync files between local directory and a bucket.""" api = get_hf_api(token=token) api.sync_bucket( source=source, dest=dest, delete=delete, ignore_times=ignore_times, ignore_sizes=ignore_sizes, existing=existing, ignore_existing=ignore_existing, include=include, exclude=exclude, filter_from=filter_from, plan=plan, apply=apply, dry_run=dry_run, verbose=verbose, quiet=out.is_quiet(), ) if plan and not out.is_quiet(): out.hint(f"Run `hf buckets sync --apply {plan}` to execute this plan.") # ============================================================================= # Cp command # ============================================================================= # `hf buckets cp` is an alias for the top-level `hf cp` command (see `cli/_cp.py`). buckets_cli.command( name="cp", examples=[ # Download (repo or bucket -> local / stdout) "hf buckets cp hf://buckets/username/my-bucket/config.json config.json", "hf buckets cp hf://buckets/username/my-bucket/data.csv data/", "hf buckets cp hf://buckets/username/my-bucket/config.json -", # Upload (local / stdin -> bucket) "hf buckets cp model.safetensors hf://buckets/username/my-bucket/model.safetensors", "hf buckets cp config.json hf://buckets/username/my-bucket/logs/", "hf buckets cp - hf://buckets/username/my-bucket/config.json", # Remote to remote (repo or bucket -> bucket) "hf buckets cp hf://buckets/username/my-bucket/data.csv hf://buckets/username/dest-bucket/", "hf buckets cp hf://buckets/username/source-bucket/logs/ hf://buckets/username/dest-bucket/logs/", ], )(make_cp("buckets"))