From 0b2147ac644596b66886f398012351641672ee54 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 21 Dec 2023 15:43:35 +0100 Subject: [PATCH] Support commit_description parameter in push_to_hub (#6520) Support commit_description param in push_to_hub --- src/datasets/arrow_dataset.py | 8 ++++++++ src/datasets/dataset_dict.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 00f76338816..6ed7de81c12 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -5228,6 +5228,7 @@ def push_to_hub( set_default: Optional[bool] = None, split: Optional[str] = None, commit_message: Optional[str] = None, + commit_description: Optional[str] = None, private: Optional[bool] = False, token: Optional[str] = None, revision: Optional[str] = None, @@ -5258,6 +5259,11 @@ def push_to_hub( The name of the split that will be given to that dataset. Defaults to `self.split`. commit_message (`str`, *optional*): Message to commit while pushing. Will default to `"Upload dataset"`. + commit_description (`str`, *optional*): + Description of the commit that will be created. + Additionally, description of the PR if a PR is created (`create_pr` is True). + + private (`bool`, *optional*, defaults to `False`): Whether the dataset repository should be set to private or not. Only affects repository creation: a repository that already exists will not be affected by that parameter. @@ -5525,6 +5531,7 @@ def push_to_hub( repo_id, operations=additions + deletions, commit_message=commit_message, + commit_description=commit_description, token=token, repo_type="dataset", revision=revision, @@ -5543,6 +5550,7 @@ def push_to_hub( repo_id, operations=operations, commit_message=commit_message + f" (part {i:05d}-of-{num_commits:05d})", + commit_description=commit_description, token=token, repo_type="dataset", revision=revision, diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index 3e912d64660..6ca6ec79a83 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -1559,6 +1559,7 @@ def push_to_hub( config_name: str = "default", set_default: Optional[bool] = None, commit_message: Optional[str] = None, + commit_description: Optional[str] = None, private: Optional[bool] = False, token: Optional[str] = None, revision: Optional[str] = None, @@ -1589,6 +1590,11 @@ def push_to_hub( named "default". commit_message (`str`, *optional*): Message to commit while pushing. Will default to `"Upload dataset"`. + commit_description (`str`, *optional*): + Description of the commit that will be created. + Additionally, description of the PR if a PR is created (`create_pr` is True). + + private (`bool`, *optional*): Whether the dataset repository should be set to private or not. Only affects repository creation: a repository that already exists will not be affected by that parameter. @@ -1802,6 +1808,7 @@ def push_to_hub( repo_id, operations=additions + deletions, commit_message=commit_message, + commit_description=commit_description, token=token, repo_type="dataset", revision=revision, @@ -1820,6 +1827,7 @@ def push_to_hub( repo_id, operations=operations, commit_message=commit_message + f" (part {i:05d}-of-{num_commits:05d})", + commit_description=commit_description, token=token, repo_type="dataset", revision=revision,