From fab12c89cd76a0a1af223d9f2e0625f5dc68129f Mon Sep 17 00:00:00 2001
From: David Li
Date: Fri, 17 Jan 2025 00:10:17 -0500
Subject: [PATCH] chore(dev/release): add changelog generator
Fixes #2452.
---
.github/workflows/dev_adbc.yml | 72 +++++++
.github/workflows/dev_pr.yml | 2 +-
ci/conda_env_dev.txt | 4 +-
dev/adbc_dev/__init__.py | 16 ++
dev/adbc_dev/changelog.py | 135 +++++++++++++
dev/adbc_dev/tests/__init__.py | 16 ++
dev/adbc_dev/tests/test_changelog.py | 188 ++++++++++++++++++
.../dev_pr => dev/adbc_dev}/title_check.py | 61 ++++--
dev/release/01-prepare.sh | 12 ++
dev/release/utils-common.sh | 16 +-
10 files changed, 493 insertions(+), 29 deletions(-)
create mode 100644 .github/workflows/dev_adbc.yml
create mode 100644 dev/adbc_dev/__init__.py
create mode 100755 dev/adbc_dev/changelog.py
create mode 100644 dev/adbc_dev/tests/__init__.py
create mode 100644 dev/adbc_dev/tests/test_changelog.py
rename {.github/workflows/dev_pr => dev/adbc_dev}/title_check.py (65%)
diff --git a/.github/workflows/dev_adbc.yml b/.github/workflows/dev_adbc.yml
new file mode 100644
index 0000000000..817166a12b
--- /dev/null
+++ b/.github/workflows/dev_adbc.yml
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Dev ADBC
+
+on:
+ pull_request:
+ branches:
+ - main
+ paths:
+ - "dev/**"
+ - ".github/workflows/dev_adbc.yml"
+ push:
+ paths:
+ - "dev/**"
+ - ".github/workflows/dev_adbc.yml"
+
+concurrency:
+ group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+defaults:
+ run:
+ shell: bash -l -eo pipefail {0}
+
+jobs:
+ pre-commit:
+ name: "pre-commit"
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ persist-credentials: false
+
+ - name: Cache Conda
+ uses: actions/cache@v4
+ with:
+ path: ~/conda_pkgs_dir
+ key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }}
+ - uses: conda-incubator/setup-miniconda@v3
+ with:
+ miniforge-version: latest
+ use-only-tar-bz2: false
+ use-mamba: true
+
+ - name: Install Dependencies
+ run: |
+ mamba install -c conda-forge \
+ --file ci/conda_env_dev.txt \
+ pytest
+
+ - name: Test
+ run: |
+ pytest -vv dev/adbc_dev/
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 2beba3120b..c1723dcb95 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -63,7 +63,7 @@ jobs:
env:
PR_TITLE: ${{ github.event.pull_request.title }}
run: |
- python .github/workflows/dev_pr/title_check.py $(pwd)/pr_checkout "$PR_TITLE"
+ python dev/adbc_dev/title_check.py $(pwd)/pr_checkout "$PR_TITLE"
# Pings make it into the commit message where they annoy the user every
# time the commit gets pushed somewhere
diff --git a/ci/conda_env_dev.txt b/ci/conda_env_dev.txt
index 0a631e8840..ff4ea5b3fc 100644
--- a/ci/conda_env_dev.txt
+++ b/ci/conda_env_dev.txt
@@ -15,8 +15,10 @@
# specific language governing permissions and limitations
# under the License.
-commitizen
gh>=2.32.0
jq
pre-commit
+pygit2
+python
+python-dotenv
twine
diff --git a/dev/adbc_dev/__init__.py b/dev/adbc_dev/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/dev/adbc_dev/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/dev/adbc_dev/changelog.py b/dev/adbc_dev/changelog.py
new file mode 100755
index 0000000000..2eccbcaf8a
--- /dev/null
+++ b/dev/adbc_dev/changelog.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Generate a changelog from our commit log."""
+
+import argparse
+import datetime
+import sys
+from pathlib import Path
+
+import dotenv
+import pygit2
+
+from . import title_check
+
+
+def display(*args, **kwargs):
+ print(*args, file=sys.stderr, **kwargs)
+
+
+def get_commit(repo: pygit2.Repository, rev: str) -> pygit2.Oid:
+ try:
+ return repo.lookup_reference_dwim(rev).target
+ except KeyError:
+ return repo[rev].id
+
+
+def list_commits(
+ repo: pygit2.Repository, from_rev: str, to_rev: str
+) -> list[title_check.Commit]:
+ root = Path(repo.workdir)
+ from_commit = get_commit(repo, from_rev)
+ to_commit = get_commit(repo, to_rev)
+ walker = repo.walk(to_commit, pygit2.GIT_SORT_TIME)
+ walker.hide(from_commit)
+ commits = []
+ for commit in walker:
+ title = commit.message.strip().split("\n")[0]
+ commits.append(title_check.matches_commit_format(root, title))
+ return commits
+
+
+def format_commit(commit: title_check.Commit) -> str:
+ components = ""
+ warning = ""
+ if commit.components:
+ components = f"**{', '.join(commit.components)}**: "
+ if commit.breaking_change:
+ warning = "⚠️ "
+ return f"{warning}{components}{commit.subject}"
+
+
+def format_section(title: str, commits: list[title_check.Commit]) -> list[str]:
+ if not commits:
+ return []
+
+ lines = [f"### {title}", ""]
+ commits.sort(key=lambda commit: (commit.components, commit.subject))
+ lines.extend(f"- {format_commit(commit)}" for commit in commits)
+ lines.append("")
+ return lines
+
+
+def format_changelog(
+ title: str, release: dict[str, str], commits: list[title_check.Commit]
+) -> str:
+ date = datetime.date.today().strftime("%Y-%m-%d")
+ lines = [
+ f"## {title} ({date})",
+ "",
+ "### Versions",
+ "",
+ f"- C/C++/GLib/Go/Python/Ruby: {release['VERSION_NATIVE']}",
+ f"- C#: {release['VERSION_CSHARP']}",
+ f"- Java: {release['VERSION_JAVA']}",
+ f"- R: {release['VERSION_R']}",
+ f"- Rust: {release['VERSION_RUST']}",
+ "",
+ ]
+
+ breaking = [commit for commit in commits if commit.breaking_change]
+ lines.extend(format_section("Breaking Changes", breaking))
+
+ feat = [commit for commit in commits if commit.category == "feat"]
+ lines.extend(format_section("New Features", feat))
+
+ fix = [commit for commit in commits if commit.category == "fix"]
+ lines.extend(format_section("Bugfixes", fix))
+
+ docs = [commit for commit in commits if commit.category == "docs"]
+ lines.extend(format_section("Documentation Improvements", docs))
+
+ perf = [commit for commit in commits if commit.category == "perf"]
+ lines.extend(format_section("Performance Improvements", perf))
+
+ return "\n".join(lines)
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("from_rev", help="The start revision.")
+ parser.add_argument("to_rev", help="The end revision.")
+ parser.add_argument("--name", required=True, help="The name of the release.")
+
+ args = parser.parse_args()
+
+ repo_root = Path(__file__).parent.parent.parent.resolve()
+ release = dotenv.dotenv_values(repo_root / "dev/release/versions.env")
+ display("Opening repository at", repo_root)
+ repo = pygit2.Repository(repo_root)
+
+ commits = list_commits(repo, args.from_rev, args.to_rev)
+ changelog = format_changelog(args.name, release, commits)
+ print(changelog)
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/dev/adbc_dev/tests/__init__.py b/dev/adbc_dev/tests/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/dev/adbc_dev/tests/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/dev/adbc_dev/tests/test_changelog.py b/dev/adbc_dev/tests/test_changelog.py
new file mode 100644
index 0000000000..eea261da0c
--- /dev/null
+++ b/dev/adbc_dev/tests/test_changelog.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+
+import pygit2
+import pytest
+
+from .. import changelog, title_check
+
+root = Path(__file__).parent.parent.parent.parent.resolve()
+
+
+@pytest.fixture(scope="module")
+def repo() -> pygit2.Repository:
+ repo_root = Path(__file__).parent.parent.parent.parent.resolve()
+ return pygit2.Repository(repo_root)
+
+
+@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_basic(commit_type) -> None:
+ title = f"{commit_type}: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == []
+ assert not commit.breaking_change
+ assert commit.subject == "test"
+
+
+@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_breaking(commit_type) -> None:
+ title = f"{commit_type}!: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == []
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+
+@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_component(commit_type) -> None:
+ title = f"{commit_type}(python): test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["python"]
+ assert not commit.breaking_change
+ assert commit.subject == "test"
+
+
+@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_multi(commit_type) -> None:
+ title = f"{commit_type}(c,format,python)!: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["c", "format", "python"]
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+ title = f"{commit_type}!(c,format,python): test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["c", "format", "python"]
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+
+@pytest.mark.parametrize("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_nested(commit_type) -> None:
+ title = f"{commit_type}(c/driver,dev/release)!: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["c/driver", "dev/release"]
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+
+@pytest.mark.parametrize(
+ "msg",
+ [
+ "feat:",
+ "unknown: foo",
+ "feat(): test",
+ "feat()!: test",
+ "feat!(c)!: test",
+ "feat(nonexistent): test",
+ "feat(c,): test",
+ "feat(c ): test",
+ "feat( c): test",
+ "feat(a#): test",
+ ],
+)
+def test_title_check_bad(msg: str) -> None:
+ commit = title_check.matches_commit_format(root, msg)
+ assert commit.failed_validation_reasons
+
+
+def test_list_commits(repo: pygit2.Repository) -> None:
+ # the base rev is not included
+ commits = changelog.list_commits(
+ repo,
+ "2360993884e6f82a6da9080d9fcd0dcf8c362b1d",
+ "8f6ffe5bd1ee5667b5626f91fc3f928f93ae94cd",
+ )
+ assert len(commits) == 2
+ assert (
+ commits[0].subject
+ == "bump com.uber.nullaway:nullaway from 0.12.2 to 0.12.3 in /java (#2417)"
+ )
+ assert (
+ commits[1].subject
+ == "bump golang.org/x/tools from 0.28.0 to 0.29.0 in /go/adbc (#2419)"
+ )
+
+
+def test_get_commit(repo: pygit2.Repository) -> None:
+ assert changelog.get_commit(repo, "HEAD") is not None
+ assert changelog.get_commit(repo, "main") is not None
+ assert (
+ changelog.get_commit(repo, "2360993884e6f82a6da9080d9fcd0dcf8c362b1d")
+ is not None
+ )
+
+
+def test_format_commit(repo: pygit2.Repository) -> None:
+ commits = changelog.list_commits(
+ repo,
+ "196522bb2f11665c7b6e0d1ed98da174315a19d9",
+ "63bb903b7ddc7730beaa3d092dc5808632cd4b08",
+ )
+ assert len(commits) == 1
+
+ formatted = changelog.format_commit(commits[0])
+ assert (
+ formatted
+ == "**c**: don't use sketchy cast to test backwards compatibility (#2425)"
+ )
+
+ commits = changelog.list_commits(
+ repo,
+ "5299ea01ab31b276c27059d82efdbdead22029e9",
+ "460937c76b923420d07d5bcfd29166c80eb45d80",
+ )
+ assert len(commits) == 1
+
+ formatted = changelog.format_commit(commits[0])
+ assert formatted == (
+ "⚠️ **java/driver-manager**: support loading "
+ "AdbcDrivers from the ServiceLoader (#1475)"
+ )
+
+
+def test_format_section(repo: pygit2.Repository) -> None:
+ assert changelog.format_section("Breaking Changes", []) == []
+
+ commits = changelog.list_commits(
+ repo,
+ "5299ea01ab31b276c27059d82efdbdead22029e9",
+ "460937c76b923420d07d5bcfd29166c80eb45d80",
+ )
+ assert len(commits) == 1
+
+ assert changelog.format_section("Breaking Changes", commits) == [
+ "### Breaking Changes",
+ "",
+ f"- {changelog.format_commit(commits[0])}",
+ "",
+ ]
diff --git a/.github/workflows/dev_pr/title_check.py b/dev/adbc_dev/title_check.py
similarity index 65%
rename from .github/workflows/dev_pr/title_check.py
rename to dev/adbc_dev/title_check.py
index df5bf92cab..c67a067335 100644
--- a/.github/workflows/dev_pr/title_check.py
+++ b/dev/adbc_dev/title_check.py
@@ -38,31 +38,55 @@
}
-def matches_commit_format(root: Path, title: str) -> typing.List[str]:
+class Commit(typing.NamedTuple):
+ category: str
+ components: list[str]
+ breaking_change: bool
+ subject: str
+
+ failed_validation_reasons: list[str]
+
+
+def matches_commit_format(root: Path, title: str) -> list[str]:
"""Check a title and return a list of reasons why it's invalid."""
if not root.is_dir():
- return [f"Invalid root: must be a directory: {root}"]
+ return Commit(
+ category="",
+ components=[],
+ breaking_change=False,
+ subject="",
+ failed_validation_reasons=[f"Invalid root: must be a directory: {root}"],
+ )
# Relax the initial regex a bit, do more friendly validation below
+ # We'll allow a deviation from Conventional Commits (feat!(foo) instead of
+ # feat(foo)!) since that appears to have snuck in already
commit_type = "([a-z]+)"
+ breaking = "(!?)"
scope = r"(?:\(([^\)]*)\))?"
- delimiter = "!?:"
+ delimiter = "(!?):"
subject = " (.+)"
- commit = re.compile(f"^{commit_type}{scope}{delimiter}{subject}$")
+ commit = re.compile(f"^{commit_type}{breaking}{scope}{delimiter}{subject}$")
valid_component = re.compile(r"^[a-zA-Z0-9_/\-\.]+$")
m = commit.match(title)
if m is None:
- return [
- "Format is incorrect, see https://www.conventionalcommits.org/en/v1.0.0/"
- ]
+ commit_spec = "https://www.conventionalcommits.org/en/v1.0.0/"
+ return Commit(
+ category="",
+ components=[],
+ breaking_change=False,
+ subject="",
+ failed_validation_reasons=[f"Format is incorrect, see {commit_spec}"],
+ )
reasons = []
commit_type = m.group(1)
if commit_type not in COMMIT_TYPES:
reasons.append(f"Invalid commit type: {commit_type}")
- components = m.group(2)
+ breaking = m.group(2)
+ components = m.group(3)
if components is not None:
if not components.strip():
reasons.append("Invalid components: must not be empty")
@@ -84,13 +108,24 @@ def matches_commit_format(root: Path, title: str) -> typing.List[str]:
f"or directory in the repo: {component}"
)
- subject = m.group(3)
+ delimiter = m.group(4)
+ subject = m.group(5)
if subject.strip() != subject:
reasons.append(f"Invalid subject: must have no trailing space: {subject}")
if subject.strip().endswith("."):
reasons.append(f"Invalid subject: must not end in a period: {subject}")
- return reasons
+ if bool(breaking) and bool(delimiter):
+ # feat!(foo)!: subject
+ reasons.append("Can only provide breaking-change '!' once")
+
+ return Commit(
+ category=commit_type,
+ components=components or [],
+ breaking_change=bool(breaking) or bool(delimiter),
+ subject=subject,
+ failed_validation_reasons=reasons,
+ )
def main():
@@ -102,13 +137,13 @@ def main():
print(f'PR title: "{args.title}"')
- reasons = matches_commit_format(args.root, args.title)
- if not reasons:
+ commit = matches_commit_format(args.root, args.title)
+ if not commit.failed_validation_reasons:
print("Title is valid")
return 0
print("Title is invalid:")
- for reason in reasons:
+ for reason in commit.failed_validation_reasons:
print("-", reason)
return 1
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index 8767e91d8f..bd3dec805f 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -57,6 +57,18 @@ main() {
echo ;
changelog
) >> ${SOURCE_DIR}/../../CHANGELOG.md
+
+ read -p "Please review the changelog. Press ENTER to continue..." ignored
+ git diff ${SOURCE_DIR}/../../CHANGELOG.md
+
+ echo "Is the changelog correct?"
+ select yn in "y" "n"; do
+ case $yn in
+ y ) echo "Continuing"; break;;
+ n ) echo "Aborting"; return 1;;
+ esac
+ done
+
git add ${SOURCE_DIR}/../../CHANGELOG.md
git commit -m "chore: update CHANGELOG.md for ${RELEASE}"
diff --git a/dev/release/utils-common.sh b/dev/release/utils-common.sh
index 99c9b66331..97e6d5c07f 100644
--- a/dev/release/utils-common.sh
+++ b/dev/release/utils-common.sh
@@ -38,20 +38,8 @@ header() {
changelog() {
# Strip trailing blank line
- local -r changelog=$(printf '%s\n' "$(cz ch --dry-run --unreleased-version "ADBC Libraries ${RELEASE}" --start-rev apache-arrow-adbc-${PREVIOUS_RELEASE})")
- # Split off header
- local -r header=$(echo "${changelog}" | head -n 1)
- local -r trailer=$(echo "${changelog}" | tail -n+2)
- echo "${header}"
- echo
- echo "### Versions"
- echo
- echo "- C/C++/GLib/Go/Python/Ruby: ${VERSION_NATIVE}"
- echo "- C#: ${VERSION_CSHARP}"
- echo "- Java: ${VERSION_JAVA}"
- echo "- R: ${VERSION_R}"
- echo "- Rust: ${VERSION_RUST}"
- echo "${trailer}"
+ local -r changelog=$(printf '%s\n' "$(env PYTHONPATH=${SOURCE_TOP_DIR}/dev python -m adbc_dev.changelog --name "ADBC Libraries ${RELEASE}" apache-arrow-adbc-${PREVIOUS_RELEASE} HEAD 2>/dev/null)")
+ echo "${changelog}"
}
header "Config"