From fab12c89cd76a0a1af223d9f2e0625f5dc68129f Mon Sep 17 00:00:00 2001 From: David Li Date: Fri, 17 Jan 2025 00:10:17 -0500 Subject: [PATCH] chore(dev/release): add changelog generator Fixes #2452. --- .github/workflows/dev_adbc.yml | 72 +++++++ .github/workflows/dev_pr.yml | 2 +- ci/conda_env_dev.txt | 4 +- dev/adbc_dev/__init__.py | 16 ++ dev/adbc_dev/changelog.py | 135 +++++++++++++ dev/adbc_dev/tests/__init__.py | 16 ++ dev/adbc_dev/tests/test_changelog.py | 188 ++++++++++++++++++ .../dev_pr => dev/adbc_dev}/title_check.py | 61 ++++-- dev/release/01-prepare.sh | 12 ++ dev/release/utils-common.sh | 16 +- 10 files changed, 493 insertions(+), 29 deletions(-) create mode 100644 .github/workflows/dev_adbc.yml create mode 100644 dev/adbc_dev/__init__.py create mode 100755 dev/adbc_dev/changelog.py create mode 100644 dev/adbc_dev/tests/__init__.py create mode 100644 dev/adbc_dev/tests/test_changelog.py rename {.github/workflows/dev_pr => dev/adbc_dev}/title_check.py (65%) diff --git a/.github/workflows/dev_adbc.yml b/.github/workflows/dev_adbc.yml new file mode 100644 index 0000000000..817166a12b --- /dev/null +++ b/.github/workflows/dev_adbc.yml @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Dev ADBC + +on: + pull_request: + branches: + - main + paths: + - "dev/**" + - ".github/workflows/dev_adbc.yml" + push: + paths: + - "dev/**" + - ".github/workflows/dev_adbc.yml" + +concurrency: + group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + contents: read + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + pre-commit: + name: "pre-commit" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Cache Conda + uses: actions/cache@v4 + with: + path: ~/conda_pkgs_dir + key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }} + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + use-only-tar-bz2: false + use-mamba: true + + - name: Install Dependencies + run: | + mamba install -c conda-forge \ + --file ci/conda_env_dev.txt \ + pytest + + - name: Test + run: | + pytest -vv dev/adbc_dev/ diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml index 2beba3120b..c1723dcb95 100644 --- a/.github/workflows/dev_pr.yml +++ b/.github/workflows/dev_pr.yml @@ -63,7 +63,7 @@ jobs: env: PR_TITLE: ${{ github.event.pull_request.title }} run: | - python .github/workflows/dev_pr/title_check.py $(pwd)/pr_checkout "$PR_TITLE" + python dev/adbc_dev/title_check.py $(pwd)/pr_checkout "$PR_TITLE" # Pings make it into the commit message where they annoy the user every # time the commit gets pushed somewhere diff --git a/ci/conda_env_dev.txt b/ci/conda_env_dev.txt index 0a631e8840..ff4ea5b3fc 100644 --- a/ci/conda_env_dev.txt +++ b/ci/conda_env_dev.txt @@ -15,8 +15,10 @@ # specific language governing permissions and limitations # under the License. -commitizen gh>=2.32.0 jq pre-commit +pygit2 +python +python-dotenv twine diff --git a/dev/adbc_dev/__init__.py b/dev/adbc_dev/__init__.py new file mode 100644 index 0000000000..13a83393a9 --- /dev/null +++ b/dev/adbc_dev/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/dev/adbc_dev/changelog.py b/dev/adbc_dev/changelog.py new file mode 100755 index 0000000000..2eccbcaf8a --- /dev/null +++ b/dev/adbc_dev/changelog.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Generate a changelog from our commit log.""" + +import argparse +import datetime +import sys +from pathlib import Path + +import dotenv +import pygit2 + +from . import title_check + + +def display(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +def get_commit(repo: pygit2.Repository, rev: str) -> pygit2.Oid: + try: + return repo.lookup_reference_dwim(rev).target + except KeyError: + return repo[rev].id + + +def list_commits( + repo: pygit2.Repository, from_rev: str, to_rev: str +) -> list[title_check.Commit]: + root = Path(repo.workdir) + from_commit = get_commit(repo, from_rev) + to_commit = get_commit(repo, to_rev) + walker = repo.walk(to_commit, pygit2.GIT_SORT_TIME) + walker.hide(from_commit) + commits = [] + for commit in walker: + title = commit.message.strip().split("\n")[0] + commits.append(title_check.matches_commit_format(root, title)) + return commits + + +def format_commit(commit: title_check.Commit) -> str: + components = "" + warning = "" + if commit.components: + components = f"**{', '.join(commit.components)}**: " + if commit.breaking_change: + warning = "⚠️ " + return f"{warning}{components}{commit.subject}" + + +def format_section(title: str, commits: list[title_check.Commit]) -> list[str]: + if not commits: + return [] + + lines = [f"### {title}", ""] + commits.sort(key=lambda commit: (commit.components, commit.subject)) + lines.extend(f"- {format_commit(commit)}" for commit in commits) + lines.append("") + return lines + + +def format_changelog( + title: str, release: dict[str, str], commits: list[title_check.Commit] +) -> str: + date = datetime.date.today().strftime("%Y-%m-%d") + lines = [ + f"## {title} ({date})", + "", + "### Versions", + "", + f"- C/C++/GLib/Go/Python/Ruby: {release['VERSION_NATIVE']}", + f"- C#: {release['VERSION_CSHARP']}", + f"- Java: {release['VERSION_JAVA']}", + f"- R: {release['VERSION_R']}", + f"- Rust: {release['VERSION_RUST']}", + "", + ] + + breaking = [commit for commit in commits if commit.breaking_change] + lines.extend(format_section("Breaking Changes", breaking)) + + feat = [commit for commit in commits if commit.category == "feat"] + lines.extend(format_section("New Features", feat)) + + fix = [commit for commit in commits if commit.category == "fix"] + lines.extend(format_section("Bugfixes", fix)) + + docs = [commit for commit in commits if commit.category == "docs"] + lines.extend(format_section("Documentation Improvements", docs)) + + perf = [commit for commit in commits if commit.category == "perf"] + lines.extend(format_section("Performance Improvements", perf)) + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("from_rev", help="The start revision.") + parser.add_argument("to_rev", help="The end revision.") + parser.add_argument("--name", required=True, help="The name of the release.") + + args = parser.parse_args() + + repo_root = Path(__file__).parent.parent.parent.resolve() + release = dotenv.dotenv_values(repo_root / "dev/release/versions.env") + display("Opening repository at", repo_root) + repo = pygit2.Repository(repo_root) + + commits = list_commits(repo, args.from_rev, args.to_rev) + changelog = format_changelog(args.name, release, commits) + print(changelog) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/dev/adbc_dev/tests/__init__.py b/dev/adbc_dev/tests/__init__.py new file mode 100644 index 0000000000..13a83393a9 --- /dev/null +++ b/dev/adbc_dev/tests/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/dev/adbc_dev/tests/test_changelog.py b/dev/adbc_dev/tests/test_changelog.py new file mode 100644 index 0000000000..eea261da0c --- /dev/null +++ b/dev/adbc_dev/tests/test_changelog.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pathlib import Path + +import pygit2 +import pytest + +from .. import changelog, title_check + +root = Path(__file__).parent.parent.parent.parent.resolve() + + +@pytest.fixture(scope="module") +def repo() -> pygit2.Repository: + repo_root = Path(__file__).parent.parent.parent.parent.resolve() + return pygit2.Repository(repo_root) + + +@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES) +def test_title_check_basic(commit_type) -> None: + title = f"{commit_type}: test" + commit = title_check.matches_commit_format(root, title) + assert not commit.failed_validation_reasons + assert commit.category == commit_type + assert commit.components == [] + assert not commit.breaking_change + assert commit.subject == "test" + + +@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES) +def test_title_check_breaking(commit_type) -> None: + title = f"{commit_type}!: test" + commit = title_check.matches_commit_format(root, title) + assert not commit.failed_validation_reasons + assert commit.category == commit_type + assert commit.components == [] + assert commit.breaking_change + assert commit.subject == "test" + + +@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES) +def test_title_check_component(commit_type) -> None: + title = f"{commit_type}(python): test" + commit = title_check.matches_commit_format(root, title) + assert not commit.failed_validation_reasons + assert commit.category == commit_type + assert commit.components == ["python"] + assert not commit.breaking_change + assert commit.subject == "test" + + +@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES) +def test_title_check_multi(commit_type) -> None: + title = f"{commit_type}(c,format,python)!: test" + commit = title_check.matches_commit_format(root, title) + assert not commit.failed_validation_reasons + assert commit.category == commit_type + assert commit.components == ["c", "format", "python"] + assert commit.breaking_change + assert commit.subject == "test" + + title = f"{commit_type}!(c,format,python): test" + commit = title_check.matches_commit_format(root, title) + assert not commit.failed_validation_reasons + assert commit.category == commit_type + assert commit.components == ["c", "format", "python"] + assert commit.breaking_change + assert commit.subject == "test" + + +@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES) +def test_title_check_nested(commit_type) -> None: + title = f"{commit_type}(c/driver,dev/release)!: test" + commit = title_check.matches_commit_format(root, title) + assert not commit.failed_validation_reasons + assert commit.category == commit_type + assert commit.components == ["c/driver", "dev/release"] + assert commit.breaking_change + assert commit.subject == "test" + + +@pytest.mark.parametrize( + "msg", + [ + "feat:", + "unknown: foo", + "feat(): test", + "feat()!: test", + "feat!(c)!: test", + "feat(nonexistent): test", + "feat(c,): test", + "feat(c ): test", + "feat( c): test", + "feat(a#): test", + ], +) +def test_title_check_bad(msg: str) -> None: + commit = title_check.matches_commit_format(root, msg) + assert commit.failed_validation_reasons + + +def test_list_commits(repo: pygit2.Repository) -> None: + # the base rev is not included + commits = changelog.list_commits( + repo, + "2360993884e6f82a6da9080d9fcd0dcf8c362b1d", + "8f6ffe5bd1ee5667b5626f91fc3f928f93ae94cd", + ) + assert len(commits) == 2 + assert ( + commits[0].subject + == "bump com.uber.nullaway:nullaway from 0.12.2 to 0.12.3 in /java (#2417)" + ) + assert ( + commits[1].subject + == "bump golang.org/x/tools from 0.28.0 to 0.29.0 in /go/adbc (#2419)" + ) + + +def test_get_commit(repo: pygit2.Repository) -> None: + assert changelog.get_commit(repo, "HEAD") is not None + assert changelog.get_commit(repo, "main") is not None + assert ( + changelog.get_commit(repo, "2360993884e6f82a6da9080d9fcd0dcf8c362b1d") + is not None + ) + + +def test_format_commit(repo: pygit2.Repository) -> None: + commits = changelog.list_commits( + repo, + "196522bb2f11665c7b6e0d1ed98da174315a19d9", + "63bb903b7ddc7730beaa3d092dc5808632cd4b08", + ) + assert len(commits) == 1 + + formatted = changelog.format_commit(commits[0]) + assert ( + formatted + == "**c**: don't use sketchy cast to test backwards compatibility (#2425)" + ) + + commits = changelog.list_commits( + repo, + "5299ea01ab31b276c27059d82efdbdead22029e9", + "460937c76b923420d07d5bcfd29166c80eb45d80", + ) + assert len(commits) == 1 + + formatted = changelog.format_commit(commits[0]) + assert formatted == ( + "⚠️ **java/driver-manager**: support loading " + "AdbcDrivers from the ServiceLoader (#1475)" + ) + + +def test_format_section(repo: pygit2.Repository) -> None: + assert changelog.format_section("Breaking Changes", []) == [] + + commits = changelog.list_commits( + repo, + "5299ea01ab31b276c27059d82efdbdead22029e9", + "460937c76b923420d07d5bcfd29166c80eb45d80", + ) + assert len(commits) == 1 + + assert changelog.format_section("Breaking Changes", commits) == [ + "### Breaking Changes", + "", + f"- {changelog.format_commit(commits[0])}", + "", + ] diff --git a/.github/workflows/dev_pr/title_check.py b/dev/adbc_dev/title_check.py similarity index 65% rename from .github/workflows/dev_pr/title_check.py rename to dev/adbc_dev/title_check.py index df5bf92cab..c67a067335 100644 --- a/.github/workflows/dev_pr/title_check.py +++ b/dev/adbc_dev/title_check.py @@ -38,31 +38,55 @@ } -def matches_commit_format(root: Path, title: str) -> typing.List[str]: +class Commit(typing.NamedTuple): + category: str + components: list[str] + breaking_change: bool + subject: str + + failed_validation_reasons: list[str] + + +def matches_commit_format(root: Path, title: str) -> list[str]: """Check a title and return a list of reasons why it's invalid.""" if not root.is_dir(): - return [f"Invalid root: must be a directory: {root}"] + return Commit( + category="", + components=[], + breaking_change=False, + subject="", + failed_validation_reasons=[f"Invalid root: must be a directory: {root}"], + ) # Relax the initial regex a bit, do more friendly validation below + # We'll allow a deviation from Conventional Commits (feat!(foo) instead of + # feat(foo)!) since that appears to have snuck in already commit_type = "([a-z]+)" + breaking = "(!?)" scope = r"(?:\(([^\)]*)\))?" - delimiter = "!?:" + delimiter = "(!?):" subject = " (.+)" - commit = re.compile(f"^{commit_type}{scope}{delimiter}{subject}$") + commit = re.compile(f"^{commit_type}{breaking}{scope}{delimiter}{subject}$") valid_component = re.compile(r"^[a-zA-Z0-9_/\-\.]+$") m = commit.match(title) if m is None: - return [ - "Format is incorrect, see https://www.conventionalcommits.org/en/v1.0.0/" - ] + commit_spec = "https://www.conventionalcommits.org/en/v1.0.0/" + return Commit( + category="", + components=[], + breaking_change=False, + subject="", + failed_validation_reasons=[f"Format is incorrect, see {commit_spec}"], + ) reasons = [] commit_type = m.group(1) if commit_type not in COMMIT_TYPES: reasons.append(f"Invalid commit type: {commit_type}") - components = m.group(2) + breaking = m.group(2) + components = m.group(3) if components is not None: if not components.strip(): reasons.append("Invalid components: must not be empty") @@ -84,13 +108,24 @@ def matches_commit_format(root: Path, title: str) -> typing.List[str]: f"or directory in the repo: {component}" ) - subject = m.group(3) + delimiter = m.group(4) + subject = m.group(5) if subject.strip() != subject: reasons.append(f"Invalid subject: must have no trailing space: {subject}") if subject.strip().endswith("."): reasons.append(f"Invalid subject: must not end in a period: {subject}") - return reasons + if bool(breaking) and bool(delimiter): + # feat!(foo)!: subject + reasons.append("Can only provide breaking-change '!' once") + + return Commit( + category=commit_type, + components=components or [], + breaking_change=bool(breaking) or bool(delimiter), + subject=subject, + failed_validation_reasons=reasons, + ) def main(): @@ -102,13 +137,13 @@ def main(): print(f'PR title: "{args.title}"') - reasons = matches_commit_format(args.root, args.title) - if not reasons: + commit = matches_commit_format(args.root, args.title) + if not commit.failed_validation_reasons: print("Title is valid") return 0 print("Title is invalid:") - for reason in reasons: + for reason in commit.failed_validation_reasons: print("-", reason) return 1 diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh index 8767e91d8f..bd3dec805f 100755 --- a/dev/release/01-prepare.sh +++ b/dev/release/01-prepare.sh @@ -57,6 +57,18 @@ main() { echo ; changelog ) >> ${SOURCE_DIR}/../../CHANGELOG.md + + read -p "Please review the changelog. Press ENTER to continue..." ignored + git diff ${SOURCE_DIR}/../../CHANGELOG.md + + echo "Is the changelog correct?" + select yn in "y" "n"; do + case $yn in + y ) echo "Continuing"; break;; + n ) echo "Aborting"; return 1;; + esac + done + git add ${SOURCE_DIR}/../../CHANGELOG.md git commit -m "chore: update CHANGELOG.md for ${RELEASE}" diff --git a/dev/release/utils-common.sh b/dev/release/utils-common.sh index 99c9b66331..97e6d5c07f 100644 --- a/dev/release/utils-common.sh +++ b/dev/release/utils-common.sh @@ -38,20 +38,8 @@ header() { changelog() { # Strip trailing blank line - local -r changelog=$(printf '%s\n' "$(cz ch --dry-run --unreleased-version "ADBC Libraries ${RELEASE}" --start-rev apache-arrow-adbc-${PREVIOUS_RELEASE})") - # Split off header - local -r header=$(echo "${changelog}" | head -n 1) - local -r trailer=$(echo "${changelog}" | tail -n+2) - echo "${header}" - echo - echo "### Versions" - echo - echo "- C/C++/GLib/Go/Python/Ruby: ${VERSION_NATIVE}" - echo "- C#: ${VERSION_CSHARP}" - echo "- Java: ${VERSION_JAVA}" - echo "- R: ${VERSION_R}" - echo "- Rust: ${VERSION_RUST}" - echo "${trailer}" + local -r changelog=$(printf '%s\n' "$(env PYTHONPATH=${SOURCE_TOP_DIR}/dev python -m adbc_dev.changelog --name "ADBC Libraries ${RELEASE}" apache-arrow-adbc-${PREVIOUS_RELEASE} HEAD 2>/dev/null)") + echo "${changelog}" } header "Config"