From f73f16c17cbcc6b84739fac39bc8b558afc4d2c7 Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Mon, 30 Dec 2024 20:58:50 -0500 Subject: [PATCH 01/10] feat: add regex tag versioning per project WIP --- projects.yaml | 380 +++++++++++++++++++++++++++++-------- tools/gen_projects_json.py | 263 ++++++++++++++----------- 2 files changed, 451 insertions(+), 192 deletions(-) diff --git a/projects.yaml b/projects.yaml index 4635451..f5de97a 100644 --- a/projects.yaml +++ b/projects.yaml @@ -26,6 +26,13 @@ projects: reason: "Official dependency management tool from Golang. To quote the first line of the readme, 'dep is safe for production use.'" - name: Apache Kafka gh_url: https://github.com/apache/kafka + tag_regex_subs: + - remove: ^kafka- + - replace: -incubating-candidate-(\d+)$ + with: rc\1 + - remove: -KAFKA-\d+$ + - remove: -cp$ + - remove: -beta\d+-candidate\d+$ - name: Minikube gh_url: https://github.com/kubernetes/minikube reason: Official kubernetes project with a logo, but no major release. @@ -36,10 +43,22 @@ projects: - name: Tor gh_url: https://github.com/torproject/tor url: https://blog.torproject.org/ + tag_regex_subs: + - remove: ^tor- + - remove: ^debian-version- + - remove: -root$ + - remove: "@\\d+$" + - remove: incompat-merged$ + - remove: -cvs-\d+-\d+$ + - remove: -pre\.\d+$ + - remove: -$ - name: Home Assistant gh_url: https://github.com/home-assistant/home-assistant - name: Vala gh_url: https://github.com/GNOME/vala + tag_regex_subs: + - replace: ^VALA_(\d)+_(\d)+_(\d)+$ + with: \1.\2.\3 - name: Onion gh_url: https://github.com/davidmoreno/onion - name: Nim @@ -47,19 +66,31 @@ projects: - name: Windows Terminal url: https://www.microsoft.com/en-ca/p/windows-terminal-preview/9n0dx20hk701 gh_url: https://github.com/microsoft/terminal + tag_regex_subs: + - remove: ^\d{4}\.\d{5}$ - name: Bitcoin gh_url: https://github.com/bitcoin/bitcoin + tag_regex_subs: + - remove: -final$ + - remove: _closest$ + - remove: _notexact$ + - remove: -guixtest1$ + - remove: test1$ - name: Caddy gh_url: https://github.com/caddyserver/caddy - name: Werkzeug gh_url: https://github.com/pallets/werkzeug reason: Depended on by Flask and many other web frameworks. + tag_regex_subs: + - remove: \.x$ - name: Cython gh_url: https://github.com/cython/cython - name: TOML gh_url: https://github.com/toml-lang/toml - name: Flask gh_url: https://github.com/pallets/flask + tag_regex_subs: + - remove: \.x$ - name: datadogpy gh_url: https://github.com/DataDog/datadogpy reason: Part of a paid product. @@ -67,6 +98,8 @@ projects: gh_url: https://github.com/scipy/scipy - name: Pandas gh_url: https://github.com/pandas-dev/pandas + tag_regex_subs: + - remove: _ahl\d+$ - name: Wheel gh_url: https://github.com/pypa/wheel reason: Depended on by production Python deployments everywhere. @@ -76,12 +109,21 @@ projects: gh_url: https://github.com/jakubroztocil/httpie - name: scikit-learn gh_url: https://github.com/scikit-learn/scikit-learn + tag_regex_subs: + - remove: -branching$ - name: certbot gh_url: https://github.com/certbot/certbot + tag_regex_subs: + - remove: -corrected$ - name: sshuttle gh_url: https://github.com/sshuttle/sshuttle + tag_regex_subs: + - remove: ^sshuttle- + - remove: -macos-bin$ - name: Theano gh_url: https://github.com/Theano/Theano + tag_regex_subs: + - remove: ^rel- - name: Bokeh gh_url: https://github.com/bokeh/bokeh - name: Magic Wormhole @@ -98,21 +140,35 @@ projects: gh_url: https://github.com/gohugoio/hugo - name: drone gh_url: https://github.com/drone/drone + tag_regex_subs: + - remove: -debug$ + - remove: -gitspaces-beta$ - name: HashiCorp Terraform gh_url: https://github.com/hashicorp/terraform - name: HashiCorp Nomad gh_url: https://github.com/hashicorp/nomad + tag_regex_subs: + - remove: -changelog$ + - remove: -connect1$ + - remove: ^ent-changelog- - name: HashiCorp Vault gh_url: https://github.com/hashicorp/vault last_zv_release_version: v0.11.6 first_release_date: 2015-04-29 first_release_version: v0.1.0 + tag_regex_subs: + - remove: -rebuild$ + - remove: ^ent-changelog- + - replace: -rc(\d+)\.(\d+)$ + with: -rc\1+\2 - name: xhyve gh_url: https://github.com/mist64/xhyve - name: zeal gh_url: https://github.com/zealdocs/zeal - name: html5lib-python gh_url: https://github.com/html5lib/html5lib-python + tag_regex_subs: + - remove: ish$ - name: MyPy gh_url: https://github.com/python/mypy - name: asn1crypto @@ -122,12 +178,17 @@ projects: url: https://cgit.freedesktop.org/gstreamer/orc gh_url: https://github.com/GStreamer/orc reason: Depended on by Ubuntu and other free desktop operating systems + tag_regex_subs: + - remove: ^orc- - name: Gephi gh_url: https://github.com/gephi/gephi - name: vim-airline gh_url: https://github.com/vim-airline/vim-airline - name: Julia gh_url: https://github.com/JuliaLang/julia + tag_regex_subs: + - replace: -pre\.([a-z]+)$ + with: -\1 - name: Flatpak gh_url: https://github.com/flatpak/flatpak - name: Meson Build System @@ -148,19 +209,40 @@ projects: - name: Pilosa gh_url: https://github.com/pilosa/pilosa url: https://www.pilosa.com/ + tag_regex_subs: + - remove: -community$ + - replace: -alpha\.(\d+)(-pre|\.)(\d+)$ + with: -alpha\1+\3 - name: fail2ban gh_url: https://github.com/fail2ban/fail2ban + tag_regex_subs: + - remove: -smartos$ + - remove: -PROPAGATE_DEBIAN_PATCHES$ + - replace: ^(\d+)_(\d+)_(\d+)$ + with: \1.\2.\3 - name: qtile gh_url: https://github.com/qtile/qtile - name: autokey gh_url: https://github.com/autokey/autokey + tag_regex_subs: + - remove: ^ak- - name: ClamAV Antivirus gh_url: https://github.com/Cisco-Talos/clamav-devel + tag_regex_subs: + - replace: CLAMAV_(\d+)_(\d+)(RC\d+)? + with: \1.\2\3 + - remove: ^clamav- + - remove: -dmgxar$ + - remove: _sf$ + - replace: "@(\\d+)$" + with: +\1 - name: OpenRCT2 url: https://openrct2.io/ gh_url: https://github.com/OpenRCT2/OpenRCT2 - name: bup gh_url: https://github.com/bup/bup + tag_regex_subs: + - remove: ^bup- - name: You-Get gh_url: https://github.com/soimort/you-get - name: Ramda @@ -181,6 +263,16 @@ projects: url: https://marketplace.visualstudio.com/items?itemName=ms-vscode.cpptools gh_url: https://github.com/microsoft/vscode-cpptools reason: Created by Microsoft and with almost 8 million installs, this is the standard extension you want if working with C or C++ in VS Code. + tag_regex_subs: + - replace: -insiders$ + with: rc1 + - replace: -insiders?(\d+)$ + with: rc\1 + - replace: -debug$ + with: dev + - replace: -prerelease(\d+)$ + with: pre\1 + - remove: ^v\. - name: pywinauto url: http://pywinauto.github.io/ gh_url: https://github.com/pywinauto/pywinauto @@ -189,9 +281,16 @@ projects: gh_url: https://github.com/facebook/react first_release_date: 2013-05-29T19:46:11Z first_release_version: 0.3.0 # A later v0.0.0 tag breaks this + tag_regex_subs: + - replace: -alpha\.(.*)$ + with: -alpha+\1 - name: Rake gh_url: https://github.com/ruby/rake reason: The pioneer of the zero-to-double-digits jump. + tag_regex_subs: + - remove: ^rake- + - remove: ^drake- + - remove: ^comp_tree- - name: Chocolatey url: https://chocolatey.org/ gh_url: https://github.com/chocolatey/choco @@ -206,10 +305,17 @@ projects: - name: Nuitka url: https://nuitka.net/ gh_url: https://github.com/Nuitka/Nuitka + tag_regex_subs: + - replace: ^(\d+\.\d+\.\d+)([a-z])$ + with: \1+\2 - name: StreamEx gh_url: https://github.com/amaembo/streamex + tag_regex_subs: + - remove: ^streamex- - name: 3proxy gh_url: https://github.com/z3APA3A/3proxy + tag_regex_subs: + - remove: ^3proxy- - name: Flow gh_url: https://github.com/facebook/flow - name: GoReleaser @@ -228,6 +334,9 @@ projects: gh_url: https://github.com/digitalbazaar/forge - name: Stellarium gh_url: https://github.com/Stellarium/stellarium + tag_regex_subs: + - replace: ^stellarium-(\d+)-(\d+)-(\d+)$ + with: \1.\2.\3 - name: xonsh gh_url: https://github.com/xonsh/xonsh - name: ccls @@ -237,6 +346,12 @@ projects: - name: Teeworlds url: https://teeworlds.com/ gh_url: https://github.com/teeworlds/teeworlds + tag_regex_subs: + - remove: -start$ + - remove: -release$ + - remove: -endofline$ + - replace: -cp-r(\d+)$ + with: rc\1 - name: Numba url: https://numba.pydata.org gh_url: https://github.com/numba/numba @@ -252,12 +367,25 @@ projects: gh_url: https://github.com/simonw/datasette - name: Tendermint gh_url: https://github.com/tendermint/tendermint/ + tag_regex_subs: + - remove: -autodraft$ + - replace: -dev0-fix0$ + with: dev + - replace: ^dev-(.*) + with: \1dev - name: Cosmos-sdk gh_url: https://github.com/cosmos/cosmos-sdk first_release_date: 2017-03-06 first_release_version: 0.2.0 # https://github.com/tendermint/basecoin/blob/master/CHANGELOG.md#020-march-6-2017 latest_release_date: 2024-12-16 latest_release_version: 0.50.11 + tag_regex_subs: + - remove: -ics + - remove: -lsm + - remove: -circuit$ + - remove: -streaming$ + - remove: -patch$ + - remove: -iris$ - name: LocalStack url: https://localstack.cloud gh_url: https://github.com/localstack/localstack @@ -274,10 +402,16 @@ projects: gh_url: https://github.com/paperjs/paper.js - name: Knex.js gh_url: https://github.com/knex/knex + tag_regex_subs: + - replace: ^(\d+),(\d+),(\d+)$ + with: \1.\2.\3 - name: zoxide gh_url: https://github.com/ajeetdsouza/zoxide - name: OpenRC gh_url: https://github.com/OpenRC/openrc + tag_regex_subs: + - remove: ^openrc- + - remove: ^funtoo-openrc- - name: Notary gh_url: https://github.com/notaryproject/notary - name: GoodbyeDPI @@ -295,6 +429,10 @@ projects: gh_url: https://github.com/stylus/stylus - name: nw.js gh_url: https://github.com/nwjs/nw.js + tag_regex_subs: + - replace: ^nw-v(\d+)\.(\d+),(\d+)$ + with: \1.\2.\3 + - remove: ^nw- - name: Video Speed Controller gh_url: https://github.com/igrigorik/videospeed url: https://chrome.google.com/webstore/detail/video-speed-controller/nffaoalbilbmmfgbnbgppjihopabppdk @@ -327,6 +465,11 @@ projects: - name: graphile-worker url: https://www.graphile.org/ gh_url: https://github.com/graphile/worker + tag_regex_subs: + - replace: -canary\.([a-z\d]+)$ + with: +\1 + - replace: -bridge\.0$ + with: "-0" - name: Monero url: https://getmonero.org gh_url: https://github.com/monero-project/monero @@ -334,14 +477,23 @@ projects: url: https://multimc.org/ gh_url: https://github.com/MultiMC/Launcher reason: Prominent launcher for maintaining multiple instances of MineCraft. + tag_regex_subs: + - remove: -final$ - name: Factor url: https://factorcode.org/ gh_url: https://github.com/factor/factor reason: A concatenative stack-based programming language. + tag_regex_subs: + - remove: ^import- + - remove: ^similar- + - replace: (release|factor)-(\d+)-(\d+)$ + with: \2.\3 - name: Tectonic url: https://tectonic-typesetting.github.io/ gh_url: https://github.com/tectonic-typesetting/tectonic reason: A TeX distributon that has been version 0 since 2016 + tag_regex_subs: + - remove: ^tectonic@ - name: Flipper url: https://www.flippercloud.io/docs gh_url: https://github.com/flippercloud/flipper @@ -352,6 +504,9 @@ projects: - name: Apache Druid url: https://druid.apache.org/ gh_url: https://github.com/apache/druid/ + tag_regex_subs: + - remove: ^druid- + - remove: -incubating - name: seaborn url: https://seaborn.pydata.org/ gh_url: https://github.com/mwaskom/seaborn @@ -393,6 +548,8 @@ projects: - name: Create url: https://createmod.net/ gh_url: https://github.com/Creators-of-Create/Create + tag_regex_subs: + - remove: ^mc\d+\.\d+/ - name: FastAPI gh_url: https://github.com/fastapi/fastapi - name: atlantis @@ -404,6 +561,9 @@ projects: - name: Nushell url: https://www.nushell.sh gh_url: https://github.com/nushell/nushell + tag_regex_subs: + - replace: ^(\d+)_(\d+)_(\d+)$ + with: \1.\2.\3 - name: Thanos url: https://thanos.io gh_url: https://github.com/thanos-io/thanos @@ -414,11 +574,15 @@ projects: gh_url: https://github.com/pyvista/pyvista - name: Apache Thrift gh_url: https://github.com/apache/thrift + tag_regex_subs: + - remove: ^thrift- - name: TypeORM url: http://typeorm.io/ gh_url: https://github.com/typeorm/typeorm - name: Sodium gh_url: https://github.com/CaffeineMC/sodium + tag_regex_subs: + - remove: ^mc\d+\.\d+(\.\d+)?- - name: The Clipboard project url: https://getclipboard.app/ gh_url: https://github.com/Slackadays/Clipboard @@ -426,11 +590,19 @@ projects: - name: Uncrustify url: http://uncrustify.sourceforge.net/ gh_url: https://github.com/uncrustify/uncrustify + tag_regex_subs: + - remove: ^uncrustify- - name: lazygit gh_url: https://github.com/jesseduffield/lazygit + tag_regex_subs: + - replace: ^pre-(release-)?([\d\.]*)(-2)? + with: \2pre\3 - name: OpenBLAS url: http://www.openblas.net/ gh_url: https://github.com/OpenMathLib/OpenBLAS + tag_regex_subs: + - replace: alpha(\d+)\.(\d+)$ + with: alpha\1+\2 - name: libc (Rust) url: https://docs.rs/libc gh_url: https://github.com/rust-lang/libc @@ -438,6 +610,9 @@ projects: - name: Metabase url: https://www.metabase.com/ gh_url: https://github.com/metabase/metabase + tag_regex_subs: # Metabase has really wierd mixed versioning + - remove: ^v2015060\d + - remove: v1.[345]\d\.\d+(\.\d+)? # Hide non-0vers, not sure why they use them... - name: esbuild url: https://esbuild.github.io gh_url: https://github.com/evanw/esbuild @@ -447,6 +622,128 @@ projects: reason: 7+ years of history, over 2M weekly downloads on npm - name: Unmanic gh_url: https://github.com/Unmanic/unmanic + - name: OpenSSL + gh_url: https://github.com/openssl/openssl + emeritus: true + first_release_date: 1998-12-23 + first_release_version: 0.9.1 # release name convention is weird, hardcode counts 2018-03-31 + last_zv_release_version: 0.9.8n + first_nonzv_release_date: 2010-03-29 + tag_regex_subs: + - replace: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+)([a-z]{1,2}) + with: \2.\3.\4+\5 + - replace: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+) + with: \2.\3.\4 + - replace: ^OpenSSL-fips-(\d+)_(\d+) + with: \1.\2 + - remove: ^openssl- + - remove: -auto-reformat$ + - remove: -format$ + - remove: -reformat$ + - name: MAME + gh_url: https://github.com/mamedev/mame + wp_url: https://en.wikipedia.org/wiki/MAME + first_release_version: 0.1 + first_release_date: 1997-02-05 + latest_release_version: 0.272 + latest_release_date: 2024-11-29 + tag_regex_subs: + - replace: ^mame(\d)(\d+)(u\d+)$ + with: \1.\2+\3 + - replace: ^mame(\d)(\d+)$ + with: \1.\2 + - name: Window Maker + url: https://windowmaker.org/ + gh_url: https://github.com/window-maker/wmaker + wp_url: https://en.wikipedia.org/wiki/Window_Maker + first_release_date: 1997-01-01 # exact date unknown + first_release_version: 0.0.3 + latest_release_version: 0.96.0 + latest_release_date: 2023-08-05 + release_count: 94 + tag_regex_subs: + - remove: ^wmaker- + - remove: ^wm- + - remove: ^release- + - remove: -crm$ + - remove: \+$ + - name: ReactOS + url: https://www.reactos.org/ + gh_url: https://github.com/reactos/reactos # Many tags are missing, use manual data below + reason: A free Windows-compatible Operating System + first_release_version: 0.0.7 + first_release_date: 1996-01-23 + latest_release_version: 0.4.14 + latest_release_date: 2021-12-16 + release_count: 59 # Ignore GitHub saying >250 releases, ~80% of them are some kind of weird backup non-releases + tag_regex_subs: + - remove: ^ReactOS- + - remove: -release$ + - name: three.js + url: https://threejs.org/ + gh_url: https://github.com/mrdoob/three.js + first_release_version: 0.1 + first_release_date: 2013-07-03T11:49:48 + latest_release_version: 0.171.0 + latest_release_date: 2024-11-29 + tag_regex_subs: + - remove: /ROME$ + - replace: ^r(\d+)$ + with: 0.\1.0 + - name: google-api-client (ruby) + url: https://rubygems.org/gems/google-api-client/ + gh_url: https://github.com/googleapis/google-api-ruby-client + first_release_version: 0.1.0 + first_release_date: 2010-10-14 + latest_release_version: 0.15.1 # Their versioning is really hard to figure out... + latest_release_date: 2024-07-29 + release_count: 254 + tag_regex_subs: + - remove: ^google-api-client- + - remove: ^google-api-client/ + - remove: ^google-api-ruby-client- + - remove: ^google-api-ruby-client/ + - name: rand + url: https://rust-random.github.io/book/ + gh_url: https://github.com/rust-random/rand + reason: The most downloaded Rust crate + first_release_version: 0.1.1 + first_release_date: 2015-02-03 + latest_release_version: 0.8.5 + latest_release_date: 2022-02-14 + release_count: 68 + - name: suhosin + url: https://suhosin.org/ + gh_url: https://github.com/sektioneins/suhosin + first_release_version: 0.9.1 + first_release_date: 2006-09-16T00:00:00 + latest_release_version: 0.9.38 + latest_release_date: 2015-05-21T00:00:00 + tag_regex_subs: + - remove: ^suhosin- + - name: Pure Data + wp_url: https://en.wikipedia.org/wiki/Pure_Data + gh_url: https://github.com/pure-data/pure-data + url: https://puredata.info/ + first_release_date: 1996-06-01 + first_release_version: 0.1 + latest_release_version: 0.55-2 + latest_release_date: 2024-11-17 + tag_regex_subs: + - replace: test(\d+)\.?([a-z\d])$ + with: dev\1+\2 + - replace: test(\d+)$ + with: dev\1 + - remove: -really$ + - name: cargo-audit + url: https://rustsec.org/ + gh_url: https://github.com/rustsec/rustsec + first_release_date: 2017-02-27 + first_release_version: 0.1.0 + latest_release_date: 2024-10-29 + latest_release_version: 0.21.0 + tag_regex_subs: + - remove: ^cargo-audit/ # Non-GitHub projects below, manually updated - name: ASCEND @@ -492,14 +789,6 @@ projects: first_release_date: 2013-03-02 latest_release_version: 0.3.4 latest_release_date: 2021-12-08 - - name: OpenSSL - gh_url: https://github.com/openssl/openssl - emeritus: true - first_release_date: 1998-12-23 - first_release_version: 0.9.1 # release name convention is weird, hardcode counts 2018-03-31 - last_zv_release_version: 0.9.8n - first_nonzv_release_date: 2010-03-29 - release_count_zv: 51 # technically only counts til 1.0 by date (2010). 0.9.8zh was the last 0ver and came out in 2015. - name: Factorio url: https://factorio.com/ reason: Popular, for-profit game. @@ -524,14 +813,6 @@ projects: first_release_date: 1999-01-08 latest_release_version: 0.82 latest_release_date: 2024-11-27 - - name: MAME - gh_url: https://github.com/mamedev/mame # GitHub releases are mame0272 - wp_url: https://en.wikipedia.org/wiki/MAME - first_release_version: 0.1 - first_release_date: 1997-02-05 - latest_release_version: 0.272 - latest_release_date: 2024-11-29 - release_count: 304 - name: slrn # thanks hynek url: http://slrn.sourceforge.net/ wp_url: https://en.wikipedia.org/wiki/Slrn @@ -551,31 +832,13 @@ projects: release_count_zv: 142 # appx, based on df wiki release history - name: "Cataclysm: Dark Days Ahead" url: https://cataclysmdda.org - gh_url: https://github.com/CleverRaven/Cataclysm-DDA # GitHub is returning tags out of order + repo_url: https://github.com/CleverRaven/Cataclysm-DDA # There are wayyyy to many tags on GitHub and the 0.A release system is hard to translate reason: Immensely popular cross-platform open-source game under continuous development for 6 years. first_release_version: 0.1 first_release_date: 2013-02-26 latest_release_version: 0.H latest_release_date: 2024-11-22 release_count: 24 # https://cataclysmdda.org/releases/ - - name: Window Maker - url: https://windowmaker.org/ - gh_url: https://github.com/window-maker/wmaker # Tags are prefixed - wp_url: https://en.wikipedia.org/wiki/Window_Maker - first_release_date: 1997-01-01 # exact date unknown - first_release_version: 0.0.3 - latest_release_version: 0.96.0 - latest_release_date: 2023-08-05 - release_count: 94 - - name: ReactOS - url: https://www.reactos.org/ - gh_url: https://github.com/reactos/reactos # Many tags are missing, use manual data below - reason: A free Windows-compatible Operating System - first_release_version: 0.0.7 - first_release_date: 1996-01-23 - latest_release_version: 0.4.14 - latest_release_date: 2021-12-16 - release_count: 59 # Ignore GitHub saying >250 releases, ~80% of them are some kind of weird backup non-releases - name: OpenStreetMap API/website url: https://openstreetmap.org/ gh_url: https://github.com/openstreetmap/openstreetmap-website # Doesn't use releases on GitHub @@ -586,45 +849,6 @@ projects: latest_release_version: 0.6 February 2021 # See https://wiki.openstreetmap.org/wiki/API_v0.6#Semantic_versioning latest_release_date: 2021-02-01 release_count: 14 # 0.3 → 0.6 and then the dated changes to 0.6 - - name: three.js - url: https://threejs.org/ - gh_url: https://github.com/mrdoob/three.js - first_release_version: 0.1 # GitHub releases are r1-r130 - first_release_date: 2013-07-03T11:49:48 - latest_release_version: 0.171.0 - latest_release_date: 2024-11-29 - - name: google-api-client (ruby) - url: https://rubygems.org/gems/google-api-client/ - gh_url: https://github.com/googleapis/google-api-ruby-client # GitHub releases are namespace prefixed - first_release_version: 0.1.0 - first_release_date: 2010-10-14 - latest_release_version: 0.15.1 # Their versioning is really hard to figure out... - latest_release_date: 2024-07-29 - release_count: 254 - - name: rand - url: https://rust-random.github.io/book/ - gh_url: https://github.com/rust-random/rand # GitHub releases are rand_distr-0.5.0-beta.0 - reason: The most downloaded Rust crate - first_release_version: 0.1.1 - first_release_date: 2015-02-03 - latest_release_version: 0.8.5 - latest_release_date: 2022-02-14 - release_count: 68 - - name: suhosin - url: https://suhosin.org/ - gh_url: https://github.com/sektioneins/suhosin # Some GitHub releases are suhosin-0.9.35-RC1 - first_release_version: 0.9.1 - first_release_date: 2006-09-16T00:00:00 - latest_release_version: 0.9.38 - latest_release_date: 2015-05-21T00:00:00 - - name: Pure Data - wp_url: https://en.wikipedia.org/wiki/Pure_Data - gh_url: https://github.com/pure-data/pure-data/ # Some GitHub releases are 0.55-0test3a - url: https://puredata.info/ - first_release_date: 1996-06-01 - first_release_version: 0.1 - latest_release_version: 0.55-2 - latest_release_date: 2024-11-17 - name: XeTeX wp_url: https://en.wikipedia.org/wiki/XeTeX url: http://xetex.sourceforge.net/ @@ -659,14 +883,6 @@ projects: first_release_version: 0.0.2 first_nonzv_release_date: 2008-06-17 last_zv_release_version: 0.9.61 - - name: cargo-audit - url: https://rustsec.org/ - # Technically on github, but has a weird monorepo version tagging scheme that'll need code to handle - gh_url: https://github.com/rustsec/rustsec - first_release_date: 2017-02-27 - first_release_version: 0.1.0 - latest_release_date: 2024-10-29 - latest_release_version: 0.21.0 - name: transformers url: https://hackage.haskell.org/package/transformers repo_url: https://hub.darcs.net/ross/transformers diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index 45a51ea..6a38de7 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -12,62 +12,16 @@ import yaml from boltons.urlutils import URL +from hyperlink import parse +from packaging.version import InvalidVersion, Version -PROJECT_ROOT_PATH = Path(__file__).parent.parent -VTAG_RE = re.compile( - r""" - ^ - [^0-9]* - (?P\d+) - \. - [0-9a-zA-Z_.]+ - """, - re.VERBOSE, -) - -# Tags matching these patterns will be completely skipped -SKIP_PATTERNS = [ - r"^ciflow/", # pytorch has loads of this noise - r"^ci/", # pytorch has loads of this noise - r"^nightly", # FreeCol -] - -# Version numbers after these patterns should be extracted -STRIP_PATTERNS = [ - r"^mc[0-9.]+-", # Sodium tags include minecraft version numbers -] - - -def strip_prefix(tag_name: str) -> str: - """Strip any non-numeric prefix from the tag name.""" - _, _, tag_name = tag_name.rpartition("/") - - if "-" in tag_name: - _, _, version = tag_name.partition("-") - if re.search(r"^\d", version): - return version - - match = re.search(r"\d", tag_name) - if match: - return tag_name[match.start() :] - return tag_name - - -def match_vtag(tag_name: str) -> re.Match | None: - """Match version tags using a more general approach.""" - tag_name = strip_prefix(tag_name) - return VTAG_RE.match(tag_name) - - -def version_key(version: str) -> tuple: - """Extract and convert version numbers to tuple for comparison.""" - clean_version = strip_prefix(version) + +def if_version_compatible(version: str) -> bool: try: - return tuple( - int(x) for x in re.split(r"\D+", clean_version) if x and x.isdigit() - ) - except (TypeError, ValueError): - return tuple() + Version(version) + except InvalidVersion: + return False + return True def _get_gh_json(url: str, args: argparse.Namespace) -> dict | list[dict]: @@ -123,37 +77,41 @@ def _get_gh_rel_data(rel_info: dict, args: argparse.Namespace) -> dict: return ret -def _find_dominant_version_pattern(tags: list[dict]) -> list[dict]: - """Find the most common version tag pattern in a project's tags.""" - patterns = {} - for tag in tags: - _, _, tag_name = tag["name"].rpartition("/") - - if any( - re.search(pattern, tag["name"]) or re.search(pattern, tag_name) - for pattern in SKIP_PATTERNS - ): +def parse_tags( + tags_data: list[dict], regex_subs: list[dict] | None = None +) -> tuple[list[dict], list[dict], list[dict]]: + tag_names = set() + parsed_tags_data = [] + failed_tags_data = [] + duplicate_tags_data = [] + + for tag in reversed(tags_data): + tag_name = tag["name"] + if regex_subs: + for sub in regex_subs: + if sub.get("remove"): + tag_name = re.sub(sub["remove"], "", tag_name) + else: + tag_name = re.sub(sub["replace"], sub["with"], tag_name) + if tag_name in tag_names: + tag["sub_name"] = tag_name + duplicate_tags_data.append(tag) continue + else: + tag_names.add(tag_name) - for pattern in STRIP_PATTERNS: - if re.search(pattern, tag_name): - prefix, _, version = tag_name.partition("-") - if re.search(r"^\d", version): - tag_name = version - break - - match = re.search(r"\d", tag_name) - if not match: - continue - prefix = tag_name[: match.start()] - if prefix in patterns: - patterns[prefix].append(tag) + if if_version_compatible(tag_name): + tag["py_version"] = Version(tag_name) + parsed_tags_data.append(tag) else: - patterns[prefix] = [tag] + tag["sub_name"] = tag_name + failed_tags_data.append(tag) - if not patterns: - return [] - return max(patterns.values(), key=len) + return ( + list(reversed(parsed_tags_data)), + list(reversed(failed_tags_data)), + duplicate_tags_data, + ) def get_gh_project_info(info: dict, args: argparse.Namespace) -> dict: @@ -173,14 +131,13 @@ def get_gh_project_info(info: dict, args: argparse.Namespace) -> dict: gh_url.path_parts += ("tags",) tags_data = _get_gh_json(gh_url.to_text(), args) if isinstance(tags_data, dict): - tags_data = [] + return gh_info - main_tags = _find_dominant_version_pattern(tags_data) - vtags_data = [td for td in main_tags if match_vtag(td["name"])] - if not vtags_data: + parsed_tags_data, _, _ = parse_tags(tags_data, info.get("tag_regex_subs")) + if not parsed_tags_data: return gh_info - gh_info["release_count"] = len(vtags_data) + gh_info["release_count"] = len(set(parsed_tags_data)) latest_release = vtags_data[0] latest_release_data = _get_gh_rel_data(latest_release, args) @@ -264,48 +221,134 @@ def fetch_entries(projects: list[dict], args: argparse.Namespace) -> list[dict]: def parse_args(): + def add_options(parser: argparse.ArgumentParser, *, caching: bool = False): + parser.add_argument( + "-u", + "--user", + type=str, + default=os.getenv("GH_USER", ""), + help='GitHub Username for API authentication. Falls back to the "GH_USER" environment variable.', + ) + parser.add_argument( + "-k", + "--token", + type=str, + default=os.getenv("GH_TOKEN", ""), + help='A path to a file containing a GitHub personal access token for API authentication. Falls back to the "GH_TOKEN" environment variable.', + ) + if caching: + parser.add_argument( + "--disable-caching", + action="store_true", + default=os.getenv("ZV_DISABLE_CACHING", "false").lower() + in [ + "true", + "1", + "yes", + ], + help='Flag to disable caching. Falls back to the "ZV_DISABLE_CACHING" environment variable.', + ) + parser = argparse.ArgumentParser( description="Generate or update project.json using projects.yaml." ) + add_options(parser, caching=True) + subparsers = parser.add_subparsers(dest="command", help="Available commands") - parser.add_argument( - "-u", - "--user", - type=str, - default=os.getenv("GH_USER", ""), - help='GitHub Username for API authentication. Falls back to the "GH_USER" environment variable.', + # Generate + generate_parser = subparsers.add_parser( + "generate", help="Generate an updated projects.json file." ) - parser.add_argument( - "-k", - "--token", + add_options(generate_parser, caching=True) + + # Info + info_parser = subparsers.add_parser( + "info", + help="Print automatically pulled info for a GitHub project for debugging.", + ) + info_parser.add_argument( + "name_or_link", type=str, - default=os.getenv("GH_TOKEN", ""), - help='A path to a file containing a GitHub personal access token for API authentication. Falls back to the "GH_TOKEN" environment variable.', + help="The project.yaml exact entry name or GitHub link.", + ) + add_options(info_parser) + + # Tags + tags_parser = subparsers.add_parser( + "tags", help="Print all sorted tags for a GitHub project for debugging." ) - parser.add_argument( - "--disable-caching", - action="store_true", - default=os.getenv("ZV_DISABLE_CACHING", "false").lower() - in [ - "true", - "1", - "yes", - ], - help='Flag to disable caching. Falls back to the "ZV_DISABLE_CACHING" environment variable.', + tags_parser.add_argument( + "name_or_link", + type=str, + help="The project.yaml exact entry name or GitHub link.", ) + add_options(tags_parser) args = parser.parse_args() + + if args.command is None: + args.command = "generate" + if Path(args.token).is_file(): with Path(args.token).open() as f: args.token = f.read().strip() + return args def main(): - start_time = time.time() - args = parse_args() + if args.command == "generate": + generate(args) + elif args.command == "info": + print("Processing", args.name_or_link) + gh_info = get_gh_project_info({"gh_url": args.name_or_link}, args) + print() + pprint(gh_info) + elif args.command == "tags": + print("Processing", args.name_or_link) + + if parse(args.name_or_link).scheme in ("http", "https"): + info = {"gh_url": args.name_or_link} + else: + projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" + with projects_yaml_path.open() as f: + projects = yaml.safe_load(f)["projects"] + matching_info = [p for p in projects if p["name"] == args.name_or_link] + if not matching_info: + print("No matching project found.") + return + info = matching_info[0] + + org, repo = URL(info["gh_url"].rstrip("/")).path_parts[1:] + gh_url = URL("https://api.github.com/repos") + gh_url.path_parts += (org, repo, "tags") + + tags_data = _get_gh_json(gh_url.to_text(), args) + if isinstance(tags_data, dict): + tags_data = [] + + parsed_tags_data, failed_tags_data, duplicate_tag_names = parse_tags( + tags_data, info.get("tag_regex_subs") + ) + print("\nParsed tags:") + for t in parsed_tags_data: + print(f"{t['name']} (parsed as {t['py_version']})") + if not parsed_tags_data: + print("No tags parsed.") + if duplicate_tag_names: + print("\nDuplicate tags:") + for t in duplicate_tag_names: + print(f"{t['name']} (parsed as {t['sub_name']})") + if failed_tags_data: + print("\nFailed tags:") + for t in failed_tags_data: + print(f"{t['name']} (tried {t['sub_name']})") + + +def generate(args: argparse.Namespace): + start_time = time.time() projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" with projects_yaml_path.open() as f: projects = yaml.safe_load(f)["projects"] @@ -339,7 +382,7 @@ def main(): print("Current data already up to date, exiting.") return - pprint(entries) + # pprint(entries) res = { "projects": entries, From c81f8bf8b6bf099f1a98eec4c77f7964ad2baa4f Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Mon, 30 Dec 2024 21:02:19 -0500 Subject: [PATCH 02/10] docs: add docstring to parse_tags --- tools/gen_projects_json.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index 6a38de7..29af8de 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -80,6 +80,23 @@ def _get_gh_rel_data(rel_info: dict, args: argparse.Namespace) -> dict: def parse_tags( tags_data: list[dict], regex_subs: list[dict] | None = None ) -> tuple[list[dict], list[dict], list[dict]]: + """Parse the list of GitHub tags returning the tags with the PEP 440 compatible version objects. + + Parameters + ---------- + tags_data: list[dict] + The list of GitHub tags to parse from the API. + regex_subs: list[dict] | None = None + The list of regex substitutions from projects.yaml to apply to the tag names before parsing. + + Returns: + parsed_tags_data: list[dict] + The list of properly parsed tags with the added "py_version" key. + failed_tags_data: list[dict] + The list of tags that failed to be parsed with the added "sub_name" key for debugging. + duplicate_tags_data: list[dict] + The list of duplicate tags with the added "sub_name" key for debugging. + """ tag_names = set() parsed_tags_data = [] failed_tags_data = [] @@ -137,7 +154,7 @@ def get_gh_project_info(info: dict, args: argparse.Namespace) -> dict: if not parsed_tags_data: return gh_info - gh_info["release_count"] = len(set(parsed_tags_data)) + gh_info["release_count"] = len(parsed_tags_data) latest_release = vtags_data[0] latest_release_data = _get_gh_rel_data(latest_release, args) From 3fd02ca921f16c811c0547b8654eb4e067c9d24e Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Mon, 30 Dec 2024 22:46:23 -0500 Subject: [PATCH 03/10] fix: finish implementing new versioning code --- projects.yaml | 136 ++++++++--------- tools/gen_projects_json.py | 299 ++++++++++++++++++++++++++----------- 2 files changed, 281 insertions(+), 154 deletions(-) diff --git a/projects.yaml b/projects.yaml index f5de97a..fe90dc1 100644 --- a/projects.yaml +++ b/projects.yaml @@ -28,8 +28,8 @@ projects: gh_url: https://github.com/apache/kafka tag_regex_subs: - remove: ^kafka- - - replace: -incubating-candidate-(\d+)$ - with: rc\1 + - search: -incubating-candidate-(\d+)$ + replace: rc\1 - remove: -KAFKA-\d+$ - remove: -cp$ - remove: -beta\d+-candidate\d+$ @@ -57,8 +57,8 @@ projects: - name: Vala gh_url: https://github.com/GNOME/vala tag_regex_subs: - - replace: ^VALA_(\d)+_(\d)+_(\d)+$ - with: \1.\2.\3 + - search: ^VALA_(\d)+_(\d)+_(\d)+$ + replace: \1.\2.\3 - name: Onion gh_url: https://github.com/davidmoreno/onion - name: Nim @@ -159,8 +159,8 @@ projects: tag_regex_subs: - remove: -rebuild$ - remove: ^ent-changelog- - - replace: -rc(\d+)\.(\d+)$ - with: -rc\1+\2 + - search: -rc(\d+)\.(\d+)$ + replace: -rc\1+\2 - name: xhyve gh_url: https://github.com/mist64/xhyve - name: zeal @@ -187,8 +187,8 @@ projects: - name: Julia gh_url: https://github.com/JuliaLang/julia tag_regex_subs: - - replace: -pre\.([a-z]+)$ - with: -\1 + - search: -pre\.([a-z]+)$ + replace: -\1 - name: Flatpak gh_url: https://github.com/flatpak/flatpak - name: Meson Build System @@ -211,15 +211,15 @@ projects: url: https://www.pilosa.com/ tag_regex_subs: - remove: -community$ - - replace: -alpha\.(\d+)(-pre|\.)(\d+)$ - with: -alpha\1+\3 + - search: -alpha\.(\d+)(-pre|\.)(\d+)$ + replace: -alpha\1+\3 - name: fail2ban gh_url: https://github.com/fail2ban/fail2ban tag_regex_subs: - remove: -smartos$ - remove: -PROPAGATE_DEBIAN_PATCHES$ - - replace: ^(\d+)_(\d+)_(\d+)$ - with: \1.\2.\3 + - search: ^(\d+)_(\d+)_(\d+)$ + replace: \1.\2.\3 - name: qtile gh_url: https://github.com/qtile/qtile - name: autokey @@ -229,13 +229,13 @@ projects: - name: ClamAV Antivirus gh_url: https://github.com/Cisco-Talos/clamav-devel tag_regex_subs: - - replace: CLAMAV_(\d+)_(\d+)(RC\d+)? - with: \1.\2\3 + - search: CLAMAV_(\d+)_(\d+)(RC\d+)? + replace: \1.\2\3 - remove: ^clamav- - remove: -dmgxar$ - remove: _sf$ - - replace: "@(\\d+)$" - with: +\1 + - search: "@(\\d+)$" + replace: +\1 - name: OpenRCT2 url: https://openrct2.io/ gh_url: https://github.com/OpenRCT2/OpenRCT2 @@ -264,14 +264,14 @@ projects: gh_url: https://github.com/microsoft/vscode-cpptools reason: Created by Microsoft and with almost 8 million installs, this is the standard extension you want if working with C or C++ in VS Code. tag_regex_subs: - - replace: -insiders$ - with: rc1 - - replace: -insiders?(\d+)$ - with: rc\1 - - replace: -debug$ - with: dev - - replace: -prerelease(\d+)$ - with: pre\1 + - search: -insiders$ + replace: rc1 + - search: -insiders?(\d+)$ + replace: rc\1 + - search: -debug$ + replace: dev + - search: -prerelease(\d+)$ + replace: pre\1 - remove: ^v\. - name: pywinauto url: http://pywinauto.github.io/ @@ -282,8 +282,8 @@ projects: first_release_date: 2013-05-29T19:46:11Z first_release_version: 0.3.0 # A later v0.0.0 tag breaks this tag_regex_subs: - - replace: -alpha\.(.*)$ - with: -alpha+\1 + - search: -alpha\.(.*)$ + replace: -alpha+\1 - name: Rake gh_url: https://github.com/ruby/rake reason: The pioneer of the zero-to-double-digits jump. @@ -306,8 +306,8 @@ projects: url: https://nuitka.net/ gh_url: https://github.com/Nuitka/Nuitka tag_regex_subs: - - replace: ^(\d+\.\d+\.\d+)([a-z])$ - with: \1+\2 + - search: ^(\d+\.\d+\.\d+)([a-z])$ + replace: \1+\2 - name: StreamEx gh_url: https://github.com/amaembo/streamex tag_regex_subs: @@ -335,8 +335,8 @@ projects: - name: Stellarium gh_url: https://github.com/Stellarium/stellarium tag_regex_subs: - - replace: ^stellarium-(\d+)-(\d+)-(\d+)$ - with: \1.\2.\3 + - search: ^stellarium-(\d+)-(\d+)-(\d+)$ + replace: \1.\2.\3 - name: xonsh gh_url: https://github.com/xonsh/xonsh - name: ccls @@ -350,8 +350,8 @@ projects: - remove: -start$ - remove: -release$ - remove: -endofline$ - - replace: -cp-r(\d+)$ - with: rc\1 + - search: -cp-r(\d+)$ + replace: rc\1 - name: Numba url: https://numba.pydata.org gh_url: https://github.com/numba/numba @@ -369,10 +369,10 @@ projects: gh_url: https://github.com/tendermint/tendermint/ tag_regex_subs: - remove: -autodraft$ - - replace: -dev0-fix0$ - with: dev - - replace: ^dev-(.*) - with: \1dev + - search: -dev0-fix0$ + replace: dev + - search: ^dev-(.*) + replace: \1dev - name: Cosmos-sdk gh_url: https://github.com/cosmos/cosmos-sdk first_release_date: 2017-03-06 @@ -403,8 +403,8 @@ projects: - name: Knex.js gh_url: https://github.com/knex/knex tag_regex_subs: - - replace: ^(\d+),(\d+),(\d+)$ - with: \1.\2.\3 + - search: ^(\d+),(\d+),(\d+)$ + replace: \1.\2.\3 - name: zoxide gh_url: https://github.com/ajeetdsouza/zoxide - name: OpenRC @@ -430,8 +430,8 @@ projects: - name: nw.js gh_url: https://github.com/nwjs/nw.js tag_regex_subs: - - replace: ^nw-v(\d+)\.(\d+),(\d+)$ - with: \1.\2.\3 + - search: ^nw-v(\d+)\.(\d+),(\d+)$ + replace: \1.\2.\3 - remove: ^nw- - name: Video Speed Controller gh_url: https://github.com/igrigorik/videospeed @@ -466,10 +466,10 @@ projects: url: https://www.graphile.org/ gh_url: https://github.com/graphile/worker tag_regex_subs: - - replace: -canary\.([a-z\d]+)$ - with: +\1 - - replace: -bridge\.0$ - with: "-0" + - search: -canary\.([a-z\d]+)$ + replace: +\1 + - search: -bridge\.0$ + replace: "-0" - name: Monero url: https://getmonero.org gh_url: https://github.com/monero-project/monero @@ -486,8 +486,8 @@ projects: tag_regex_subs: - remove: ^import- - remove: ^similar- - - replace: (release|factor)-(\d+)-(\d+)$ - with: \2.\3 + - search: (release|factor)-(\d+)-(\d+)$ + replace: \2.\3 - name: Tectonic url: https://tectonic-typesetting.github.io/ gh_url: https://github.com/tectonic-typesetting/tectonic @@ -562,8 +562,8 @@ projects: url: https://www.nushell.sh gh_url: https://github.com/nushell/nushell tag_regex_subs: - - replace: ^(\d+)_(\d+)_(\d+)$ - with: \1.\2.\3 + - search: ^(\d+)_(\d+)_(\d+)$ + replace: \1.\2.\3 - name: Thanos url: https://thanos.io gh_url: https://github.com/thanos-io/thanos @@ -595,14 +595,14 @@ projects: - name: lazygit gh_url: https://github.com/jesseduffield/lazygit tag_regex_subs: - - replace: ^pre-(release-)?([\d\.]*)(-2)? - with: \2pre\3 + - search: ^pre-(release-)?([\d\.]*)(-2)? + replace: \2pre\3 - name: OpenBLAS url: http://www.openblas.net/ gh_url: https://github.com/OpenMathLib/OpenBLAS tag_regex_subs: - - replace: alpha(\d+)\.(\d+)$ - with: alpha\1+\2 + - search: alpha(\d+)\.(\d+)$ + replace: alpha\1+\2 - name: libc (Rust) url: https://docs.rs/libc gh_url: https://github.com/rust-lang/libc @@ -630,12 +630,12 @@ projects: last_zv_release_version: 0.9.8n first_nonzv_release_date: 2010-03-29 tag_regex_subs: - - replace: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+)([a-z]{1,2}) - with: \2.\3.\4+\5 - - replace: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+) - with: \2.\3.\4 - - replace: ^OpenSSL-fips-(\d+)_(\d+) - with: \1.\2 + - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+)([a-z]{1,2}) + replace: \2.\3.\4+\5 + - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+) + replace: \2.\3.\4 + - search: ^OpenSSL-fips-(\d+)_(\d+) + replace: \1.\2 - remove: ^openssl- - remove: -auto-reformat$ - remove: -format$ @@ -648,10 +648,10 @@ projects: latest_release_version: 0.272 latest_release_date: 2024-11-29 tag_regex_subs: - - replace: ^mame(\d)(\d+)(u\d+)$ - with: \1.\2+\3 - - replace: ^mame(\d)(\d+)$ - with: \1.\2 + - search: ^mame(\d)(\d+)(u\d+)$ + replace: \1.\2+\3 + - search: ^mame(\d)(\d+)$ + replace: \1.\2 - name: Window Maker url: https://windowmaker.org/ gh_url: https://github.com/window-maker/wmaker @@ -688,8 +688,8 @@ projects: latest_release_date: 2024-11-29 tag_regex_subs: - remove: /ROME$ - - replace: ^r(\d+)$ - with: 0.\1.0 + - search: ^r(\d+)$ + replace: 0.\1.0 - name: google-api-client (ruby) url: https://rubygems.org/gems/google-api-client/ gh_url: https://github.com/googleapis/google-api-ruby-client @@ -730,10 +730,10 @@ projects: latest_release_version: 0.55-2 latest_release_date: 2024-11-17 tag_regex_subs: - - replace: test(\d+)\.?([a-z\d])$ - with: dev\1+\2 - - replace: test(\d+)$ - with: dev\1 + - search: test(\d+)\.?([a-z\d])$ + replace: dev\1+\2 + - search: test(\d+)$ + replace: dev\1 - remove: -really$ - name: cargo-audit url: https://rustsec.org/ diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index 29af8de..eac9924 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -9,6 +9,7 @@ import urllib.request from pathlib import Path from pprint import pprint +from typing import TypedDict, cast import yaml from boltons.urlutils import URL @@ -16,7 +17,128 @@ from packaging.version import InvalidVersion, Version -def if_version_compatible(version: str) -> bool: +class RegexSubstituionDict(TypedDict): + remove: str + """The regex pattern to remove from the tag name.""" + search: str + """The regex pattern to search for in the tag name to replace with `replace`.""" + replace: str + """The string to replace the `search` pattern with.""" + + +class ProjectsInputEntryDict(TypedDict): + name: str + """The name of the project.""" + url: str + """The project's home page.""" + gh_url: str + """The project's GitHub repository link.""" + repo_url: str + """The project's non-GitHub repository link.""" + wp_url: str + """The project's Wikipedia link.""" + emeritus: bool + """`true` if the project is no longer ZeroVer""" + reason: str + """The reason this project was added to the 0ver website listing.""" + tag_regex_subs: list[RegexSubstituionDict] + """The list of regex substitutions to apply to the tag names before parsing.""" + star_count: int + """The number of stars the project has.""" + release_count: int + """The number of releases the project has had.""" + release_count_zv: int + """The number of releases the project has before it left 0ver.""" + latest_release_date: datetime.datetime | datetime.date + """The date of the latest release.""" + latest_release_version: str | Version + """The version of the latest release.""" + first_release_date: datetime.datetime | datetime.date + """The date of the first release.""" + first_release_version: str | Version + """The version of the first release.""" + first_nonzv_release_date: datetime.datetime | datetime.date + """The date of the first non-0ver release.""" + first_nonzv_release_version: str | Version + """The version of the first non-0ver release.""" + last_zv_release_version: str | Version + """The last 0ver release before the project left ZeroVer.""" + + +class ProjectsOutputEntryDict(ProjectsInputEntryDict): + is_zerover: bool + """Whether the project is still ZeroVer.""" + + +class GitHubTagCommitDict(TypedDict): + sha: str + url: str + + +class GitHubTagDict(TypedDict): + name: str + """The name of the tag.""" + zipball_url: str + tarball_url: str + commit: GitHubTagCommitDict + node_id: str + + +class GitHubParsedTagDict(GitHubTagDict): + version: Version + """The parsed PEP 440 compatible version object.""" + + +class GitHubDebugTagDict(GitHubTagDict): + sub_name: str + """The tag name after applying regex substitutions.""" + + +class GitHubDetailedTagDict(TypedDict): + tag: str + """The name of the tag.""" + version: Version + """The parsed PEP 440 compatible version object.""" + api_commit_url: str + """The API URL of the commit.""" + date: datetime.datetime + """The date of the commit.""" + link: str + """The URL of the commit.""" + + +class GitHubInfoDict(TypedDict): + star_count: int + """The number of stars the project has.""" + release_count: int + """The number of releases the project has had.""" + release_count_zv: int + """The number of releases the project has before it left 0ver.""" + latest_release_date: datetime.datetime | datetime.date + """The date of the latest release.""" + latest_release_version: str | Version + """The version of the latest release.""" + first_release_date: datetime.datetime | datetime.date + """The date of the first release.""" + first_release_version: str | Version + """The version of the first release.""" + first_nonzv_release_date: datetime.datetime | datetime.date + """The date of the first non-0ver release.""" + first_nonzv_release_version: str | Version + """The version of the first non-0ver release.""" + last_zv_release_version: str | Version + """The last 0ver release before the project left ZeroVer.""" + is_zerover: bool + """Whether the project is still ZeroVer.""" + + +def json_default(obj): + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + raise TypeError(f"{obj} is not serializable") + + +def is_version_compatible(version: str) -> bool: try: Version(version) except InvalidVersion: @@ -63,44 +185,45 @@ def _get_gh_json(url: str, args: argparse.Namespace) -> dict | list[dict]: return ret -def _get_gh_rel_data(rel_info: dict, args: argparse.Namespace) -> dict: - ret = {} - ret["tag"] = rel_info["name"] - ret["version"] = None - if match_vtag(ret["tag"]): - ret["version"] = strip_prefix(ret["tag"]) - ret["api_commit_url"] = rel_info["commit"]["url"] - rel_data = _get_gh_json(ret["api_commit_url"], args) - if isinstance(rel_data, dict): - ret["date"] = rel_data["commit"]["author"]["date"] - ret["link"] = rel_data["html_url"] - return ret +def _get_gh_rel_data( + rel_info: GitHubParsedTagDict, args: argparse.Namespace +) -> GitHubDetailedTagDict: + rel_data: dict = _get_gh_json(rel_info["commit"]["url"], args) # type: ignore + return { + "tag": rel_info["name"], + "version": rel_info["version"], + "api_commit_url": rel_info["commit"]["url"], + "date": rel_data["commit"]["author"]["date"], + "link": rel_data["html_url"], + } def parse_tags( - tags_data: list[dict], regex_subs: list[dict] | None = None -) -> tuple[list[dict], list[dict], list[dict]]: + tags_data: list[GitHubTagDict], regex_subs: list[RegexSubstituionDict] | None = None +) -> tuple[ + list[GitHubParsedTagDict], list[GitHubDebugTagDict], list[GitHubDebugTagDict] +]: """Parse the list of GitHub tags returning the tags with the PEP 440 compatible version objects. Parameters ---------- tags_data: list[dict] The list of GitHub tags to parse from the API. - regex_subs: list[dict] | None = None + regex_subs: list[RegexSubstituionDict] | None = None The list of regex substitutions from projects.yaml to apply to the tag names before parsing. Returns: parsed_tags_data: list[dict] - The list of properly parsed tags with the added "py_version" key. + The list of properly parsed tags with the added "version" key. failed_tags_data: list[dict] The list of tags that failed to be parsed with the added "sub_name" key for debugging. duplicate_tags_data: list[dict] The list of duplicate tags with the added "sub_name" key for debugging. """ - tag_names = set() - parsed_tags_data = [] - failed_tags_data = [] - duplicate_tags_data = [] + tag_names: set[str] = set() + parsed_tags_data: list[GitHubParsedTagDict] = [] + failed_tags_data: list[GitHubDebugTagDict] = [] + duplicate_tags_data: list[GitHubDebugTagDict] = [] for tag in reversed(tags_data): tag_name = tag["name"] @@ -109,20 +232,17 @@ def parse_tags( if sub.get("remove"): tag_name = re.sub(sub["remove"], "", tag_name) else: - tag_name = re.sub(sub["replace"], sub["with"], tag_name) + tag_name = re.sub(sub["search"], sub["replace"], tag_name) if tag_name in tag_names: - tag["sub_name"] = tag_name - duplicate_tags_data.append(tag) + duplicate_tags_data.append({**tag, "sub_name": tag_name}) continue else: tag_names.add(tag_name) - if if_version_compatible(tag_name): - tag["py_version"] = Version(tag_name) - parsed_tags_data.append(tag) + if is_version_compatible(tag_name): + parsed_tags_data.append({**tag, "version": Version(tag_name)}) else: - tag["sub_name"] = tag_name - failed_tags_data.append(tag) + failed_tags_data.append({**tag, "sub_name": tag_name}) return ( list(reversed(parsed_tags_data)), @@ -131,8 +251,10 @@ def parse_tags( ) -def get_gh_project_info(info: dict, args: argparse.Namespace) -> dict: - gh_info = {} +def get_gh_project_info( + info: ProjectsInputEntryDict, args: argparse.Namespace +) -> GitHubInfoDict: + gh_info: GitHubInfoDict = {} # type: ignore url = info.get("gh_url") if url is None: return gh_info @@ -146,89 +268,86 @@ def get_gh_project_info(info: dict, args: argparse.Namespace) -> dict: gh_info["star_count"] = project_data["stargazers_count"] gh_url.path_parts += ("tags",) - tags_data = _get_gh_json(gh_url.to_text(), args) - if isinstance(tags_data, dict): - return gh_info - + tags_data: list[GitHubTagDict] = _get_gh_json(gh_url.to_text(), args) # type: ignore parsed_tags_data, _, _ = parse_tags(tags_data, info.get("tag_regex_subs")) if not parsed_tags_data: return gh_info gh_info["release_count"] = len(parsed_tags_data) - latest_release = vtags_data[0] - latest_release_data = _get_gh_rel_data(latest_release, args) - for k, v in latest_release_data.items(): - gh_info[f"latest_release_{k}"] = v + # Latest release + if "latest_release_date" not in info or "latest_release_version" not in info: + latest_release = parsed_tags_data[0] + latest_release_data = _get_gh_rel_data(latest_release, args) + for k, v in latest_release_data.items(): + gh_info[f"latest_release_{k}"] = v + else: + info["latest_release_version"] = Version(info["latest_release_version"]) # type: ignore + # TODO: ensure latest_release_version is Version() compatible in the check_projects_json.py script - vtags_data.sort(key=lambda x: version_key(x["name"]), reverse=True) + # Sort after grabbing the latest release + # TODO: check if this is needed + # parsed_tags_data.sort(key=lambda x: x["version"], reverse=True) - first_release_version = info.get("first_release_version") + # First release first_release = None - if first_release_version is None: - first_release = [ - v - for v in vtags_data - if version_key(v["name"]) < version_key(latest_release["name"]) - ][-1] - else: - first_releases = [v for v in vtags_data if v["name"] == first_release_version] + if "first_release_version" in info: + first_releases = [ + v for v in parsed_tags_data if v["name"] == info["first_release_version"] + ] if first_releases: first_release = first_releases[0] + else: + first_release = parsed_tags_data[-1] if first_release: first_release_data = _get_gh_rel_data(first_release, args) for k, v in first_release_data.items(): gh_info[f"first_release_{k}"] = v + # ZeroVer releases zv_releases = [] - for rel in vtags_data: - match = match_vtag(rel["name"]) - if match and match.group("major") == "0": + for rel in parsed_tags_data: + if rel["version"].major == 0: zv_releases.append(rel) gh_info["release_count_zv"] = len(zv_releases) print( f' .. {gh_info["release_count"]} releases, {gh_info["release_count_zv"]} 0ver' ) - is_zerover = latest_release in zv_releases - - gh_info["is_zerover"] = is_zerover - - if is_zerover: + gh_info["is_zerover"] = gh_info["latest_release_version"].major == 0 # type: ignore + if gh_info["is_zerover"]: return gh_info + # Last ZeroVer release last_zv_release = zv_releases[0] - first_nonzv_release = vtags_data[vtags_data.index(last_zv_release) - 1] - first_nonzv_release_data = _get_gh_rel_data(first_nonzv_release, args) - gh_info["last_zv_release_version"] = last_zv_release["name"] + + # First non-ZeroVer release + first_nonzv_release = parsed_tags_data[parsed_tags_data.index(last_zv_release) - 1] + first_nonzv_release_data = _get_gh_rel_data(first_nonzv_release, args) for k, v in first_nonzv_release_data.items(): gh_info[f"first_nonzv_release_{k}"] = v return gh_info -def json_default(obj): - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - raise TypeError(f"{obj} is not serializable") - - -def fetch_entries(projects: list[dict], args: argparse.Namespace) -> list[dict]: - entries = [] +def fetch_entries( + projects: list[ProjectsInputEntryDict], args: argparse.Namespace +) -> list[ProjectsOutputEntryDict]: + entries: list[ProjectsOutputEntryDict] = [] for p in projects: print("Processing", p["name"]) - info = dict(p) + info: ProjectsOutputEntryDict = cast(ProjectsOutputEntryDict, p) if info.get("skip"): continue info["url"] = info.get("url", info.get("gh_url")) if info.get("gh_url"): - gh_info = get_gh_project_info(info, args) + gh_info = get_gh_project_info(p, args) # Only add new data, preserve any manual information - info.update({k: v for k, v in gh_info.items() if k not in info}) + info.update({k: v for k, v in gh_info.items() if k not in info}) # type: ignore info["is_zerover"] = info.get("is_zerover", not info.get("emeritus", False)) @@ -313,13 +432,31 @@ def add_options(parser: argparse.ArgumentParser, *, caching: bool = False): return args +def get_entry_from_name(name: str) -> ProjectsInputEntryDict: + projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" + with projects_yaml_path.open() as f: + projects = yaml.safe_load(f)["projects"] + matching_info = [p for p in projects if p["name"] == name] + if not matching_info: + print("No matching project found.") + sys.exit(1) + return matching_info[0] + + def main(): args = parse_args() if args.command == "generate": generate(args) elif args.command == "info": print("Processing", args.name_or_link) - gh_info = get_gh_project_info({"gh_url": args.name_or_link}, args) + + if parse(args.name_or_link).scheme in ("http", "https"): + info = {"gh_url": args.name_or_link} + else: + info = get_entry_from_name(args.name_or_link) + + gh_info = get_gh_project_info(info, args) # type: ignore + print() pprint(gh_info) elif args.command == "tags": @@ -328,30 +465,20 @@ def main(): if parse(args.name_or_link).scheme in ("http", "https"): info = {"gh_url": args.name_or_link} else: - projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" - with projects_yaml_path.open() as f: - projects = yaml.safe_load(f)["projects"] - matching_info = [p for p in projects if p["name"] == args.name_or_link] - if not matching_info: - print("No matching project found.") - return - info = matching_info[0] + info = get_entry_from_name(args.name_or_link) org, repo = URL(info["gh_url"].rstrip("/")).path_parts[1:] gh_url = URL("https://api.github.com/repos") gh_url.path_parts += (org, repo, "tags") - tags_data = _get_gh_json(gh_url.to_text(), args) - if isinstance(tags_data, dict): - tags_data = [] - + tags_data: list[GitHubTagDict] = _get_gh_json(gh_url.to_text(), args) # type: ignore parsed_tags_data, failed_tags_data, duplicate_tag_names = parse_tags( tags_data, info.get("tag_regex_subs") ) print("\nParsed tags:") for t in parsed_tags_data: - print(f"{t['name']} (parsed as {t['py_version']})") + print(f"{t['name']} (parsed as {t['version']})") if not parsed_tags_data: print("No tags parsed.") if duplicate_tag_names: @@ -377,7 +504,7 @@ def generate(args: argparse.Namespace): try: with projects_json_path.open() as f: cur_data = json.load(f) - cur_projects = cur_data["projects"] + cur_projects: list[ProjectsInputEntryDict] = cur_data["projects"] cur_gen_date = datetime.datetime.fromisoformat(cur_data["gen_date"]) except (IOError, KeyError): cur_projects = [] From 056dee2fade2fe9fe70bc733be1dadffec3608f5 Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Tue, 31 Dec 2024 10:34:51 -0500 Subject: [PATCH 04/10] feat: switch to classes and GraphQL --- requirements.in | 1 + tools/gen_projects_json.py | 826 ++++++++++++++++++++----------------- 2 files changed, 437 insertions(+), 390 deletions(-) diff --git a/requirements.in b/requirements.in index a934d14..832e6ab 100644 --- a/requirements.in +++ b/requirements.in @@ -2,4 +2,5 @@ boltons chert hyperlink pyyaml +requests schema diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index eac9924..5ec45c4 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -1,16 +1,15 @@ import argparse -import base64 import datetime import json import os import re import sys import time -import urllib.request from pathlib import Path from pprint import pprint from typing import TypedDict, cast +import requests import yaml from boltons.urlutils import URL from hyperlink import parse @@ -26,110 +25,140 @@ class RegexSubstituionDict(TypedDict): """The string to replace the `search` pattern with.""" -class ProjectsInputEntryDict(TypedDict): - name: str - """The name of the project.""" - url: str - """The project's home page.""" - gh_url: str - """The project's GitHub repository link.""" - repo_url: str - """The project's non-GitHub repository link.""" - wp_url: str - """The project's Wikipedia link.""" - emeritus: bool - """`true` if the project is no longer ZeroVer""" - reason: str - """The reason this project was added to the 0ver website listing.""" - tag_regex_subs: list[RegexSubstituionDict] - """The list of regex substitutions to apply to the tag names before parsing.""" - star_count: int - """The number of stars the project has.""" - release_count: int - """The number of releases the project has had.""" - release_count_zv: int - """The number of releases the project has before it left 0ver.""" - latest_release_date: datetime.datetime | datetime.date - """The date of the latest release.""" - latest_release_version: str | Version - """The version of the latest release.""" - first_release_date: datetime.datetime | datetime.date - """The date of the first release.""" - first_release_version: str | Version - """The version of the first release.""" - first_nonzv_release_date: datetime.datetime | datetime.date - """The date of the first non-0ver release.""" - first_nonzv_release_version: str | Version - """The version of the first non-0ver release.""" - last_zv_release_version: str | Version - """The last 0ver release before the project left ZeroVer.""" - - -class ProjectsOutputEntryDict(ProjectsInputEntryDict): - is_zerover: bool - """Whether the project is still ZeroVer.""" - - -class GitHubTagCommitDict(TypedDict): - sha: str - url: str - - -class GitHubTagDict(TypedDict): - name: str - """The name of the tag.""" - zipball_url: str - tarball_url: str - commit: GitHubTagCommitDict - node_id: str - - -class GitHubParsedTagDict(GitHubTagDict): - version: Version - """The parsed PEP 440 compatible version object.""" - - -class GitHubDebugTagDict(GitHubTagDict): - sub_name: str - """The tag name after applying regex substitutions.""" - - -class GitHubDetailedTagDict(TypedDict): - tag: str - """The name of the tag.""" - version: Version - """The parsed PEP 440 compatible version object.""" - api_commit_url: str - """The API URL of the commit.""" - date: datetime.datetime - """The date of the commit.""" - link: str - """The URL of the commit.""" - - -class GitHubInfoDict(TypedDict): - star_count: int - """The number of stars the project has.""" - release_count: int - """The number of releases the project has had.""" - release_count_zv: int - """The number of releases the project has before it left 0ver.""" - latest_release_date: datetime.datetime | datetime.date - """The date of the latest release.""" - latest_release_version: str | Version - """The version of the latest release.""" - first_release_date: datetime.datetime | datetime.date - """The date of the first release.""" - first_release_version: str | Version - """The version of the first release.""" - first_nonzv_release_date: datetime.datetime | datetime.date - """The date of the first non-0ver release.""" - first_nonzv_release_version: str | Version - """The version of the first non-0ver release.""" - last_zv_release_version: str | Version - """The last 0ver release before the project left ZeroVer.""" - is_zerover: bool - """Whether the project is still ZeroVer.""" +class GitHubTag: + def __init__(self, name: str, commit_url: str, committed_date: datetime.datetime): + self.name = name + self.processed_name = name + self.commit_url = commit_url + self.committed_date = committed_date + self.version: Version | None = None + + def is_version_compatible(self) -> bool: + try: + Version(self.processed_name) + except InvalidVersion: + return False + return True + + def process_name(self, regex_subs: list[RegexSubstituionDict] | None = None): + for sub in regex_subs or []: + if sub.get("remove"): + self.processed_name = re.sub(sub["remove"], "", self.processed_name) + else: + self.processed_name = re.sub( + sub["search"], sub["replace"], self.processed_name + ) + + def parse_version(self): + self.version = Version(self.processed_name) + + +class GitHubAPI: + def __init__(self, user: str, token: str, org: str, repo: str): + self.user = user + self.token = token + self.headers = {"Authorization": f"Bearer {self.token}"} + self.org = org + self.repo = repo + + def get_repo_info(self) -> dict: + query = """ + query($owner: String!, $repo: String!) { + rateLimit { + remaining + } + repository(owner: $owner, name: $repo) { + stargazerCount + } + } + """ + variables = {"owner": self.org, "repo": self.repo} + response = requests.post( + "https://api.github.com/graphql", + json={"query": query, "variables": variables}, + headers=self.headers, + ) + data = response.json() + + print(f" (( {data["data"]["rateLimit"]["remaining"]} requests remaining") + + return {"star_count": data["data"]["repository"]["stargazerCount"]} + + def fetch_tags(self) -> list[GitHubTag]: + query = """ + query($owner: String!, $repo: String!, $cursor: String) { + rateLimit { + remaining + } + repository(owner: $owner, name: $repo) { + refs(refPrefix: "refs/tags/", first: 100, after: $cursor, orderBy: {field: TAG_COMMIT_DATE, direction: DESC}) { + edges { + node { + name + target { + commitUrl + ... on Commit { + committedDate + } + ... on Tag { + target { + ... on Commit { + committedDate + } + ... on Tag { + target { + ... on Commit { + committedDate + } + } + } + } + } + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + } + """ + cursor = None + all_tags = [] + + while True: + variables = {"owner": self.org, "repo": self.repo, "cursor": cursor} + response = requests.post( + "https://api.github.com/graphql", + json={"query": query, "variables": variables}, + headers=self.headers, + ) + data = response.json() + + refs = data["data"]["repository"]["refs"] + all_tags.extend(refs["edges"]) + + if refs["pageInfo"]["hasNextPage"]: + cursor = refs["pageInfo"]["endCursor"] + else: + break + + print(f" (( {data["data"]["rateLimit"]["remaining"]} requests remaining") + + return [ + GitHubTag( + name=t["node"]["name"], + commit_url=t["node"]["target"]["commitUrl"], + committed_date=datetime.datetime.fromisoformat( + t["node"]["target"].get("committedDate") + or t["node"]["target"]["target"].get("committedDate") + or t["node"]["target"]["target"]["target"]["committedDate"] + ), + ) + for t in all_tags + ] def json_default(obj): @@ -138,222 +167,326 @@ def json_default(obj): raise TypeError(f"{obj} is not serializable") -def is_version_compatible(version: str) -> bool: - try: - Version(version) - except InvalidVersion: - return False - return True - - -def _get_gh_json(url: str, args: argparse.Namespace) -> dict | list[dict]: - """ - Get paginated results from GitHub, possibly authorized based on command - line arguments or environment variables. - """ - req = urllib.request.Request(url) - if args.user and args.token: - auth_str = f"{args.user}:{args.token}" - auth_bytes = auth_str.encode("ascii") - auth_header_val = f'Basic {base64.b64encode(auth_bytes).decode("ascii")}' - req.add_header("Authorization", auth_header_val) - - resp = urllib.request.urlopen(req) - body = resp.read() - res = json.loads(body) - rate_rem = int(resp.info().get("x-ratelimit-remaining", "-1")) - - if not isinstance(res, list) or not res: - print(f" (( {rate_rem} requests remaining") - return res - - page = 2 - ret = res - while res: - paged_url = f"{url}?page={page}" - req = urllib.request.Request(paged_url) - if args.user and args.token: - req.add_header("Authorization", auth_header_val) - resp = urllib.request.urlopen(req) - body = resp.read() - res = json.loads(body) - ret.extend(res) - page += 1 - - rate_rem = int(resp.info().get("x-ratelimit-remaining", "-1")) - print(f" (( {rate_rem} requests remaining") - return ret - - -def _get_gh_rel_data( - rel_info: GitHubParsedTagDict, args: argparse.Namespace -) -> GitHubDetailedTagDict: - rel_data: dict = _get_gh_json(rel_info["commit"]["url"], args) # type: ignore - return { - "tag": rel_info["name"], - "version": rel_info["version"], - "api_commit_url": rel_info["commit"]["url"], - "date": rel_data["commit"]["author"]["date"], - "link": rel_data["html_url"], - } +class ProjectsEntry: + def __init__( + self, + name: str, + url: str | None = None, + gh_url: str | None = None, + repo_url: str | None = None, + wp_url: str | None = None, + emeritus: bool | None = None, + reason: str | None = None, + tag_regex_subs: list[RegexSubstituionDict] | None = None, + star_count: int | None = None, + release_count: int | None = None, + release_count_zv: int | None = None, + latest_release_date: datetime.datetime | datetime.date | None = None, + latest_release_version: str | Version | None = None, + first_release_date: datetime.datetime | datetime.date | None = None, + first_release_version: str | Version | None = None, + first_nonzv_release_date: datetime.datetime | datetime.date | None = None, + first_nonzv_release_version: str | Version | None = None, + last_zv_release_version: str | Version | None = None, + ): + self.name: str = name + """The name of the project.""" + self.url: str | None = url + """The project's home page.""" + self.gh_url: str | None = gh_url + """The project's GitHub repository link.""" + self.repo_url: str | None = repo_url + """The project's non-GitHub repository link.""" + self.wp_url: str | None = wp_url + """The project's Wikipedia link.""" + self.emeritus: bool | None = emeritus + """`true` if the project is no longer ZeroVer""" + self.is_zerover: bool = bool(self.emeritus) # TODO: combine with emeritus + """Whether the project is still ZeroVer.""" + self.reason: str | None = reason + """The reason this project was added to the 0ver website listing.""" + self.tag_regex_subs: list[RegexSubstituionDict] | None = tag_regex_subs + """The list of regex substitutions to apply to the tag names before parsing.""" + self.star_count: int | None = star_count + """The number of stars the project has.""" + self.release_count: int | None = release_count + """The number of releases the project has had.""" + self.release_count_zv: int | None = release_count_zv + """The number of releases the project has before it left 0ver.""" + self.latest_release_date: datetime.datetime | datetime.date | None = ( + latest_release_date + ) + """The date of the latest release.""" + self.latest_release_version: Version | None = ( + Version(latest_release_version) + if isinstance(latest_release_version, str) + else latest_release_version + ) + """The version of the latest release.""" + self.latest_release_tag: str | None = None + """The tag name of the latest release.""" + self.latest_release_link: str | None = None + """The URL of the latest release commit.""" + self.first_release_date: datetime.datetime | datetime.date | None = ( + first_release_date + ) + """The date of the first release.""" + self.first_release_version: Version | None = ( + Version(first_release_version) + if isinstance(first_release_version, str) + else first_release_version + ) + self.first_release_tag: str | None = None + """The tag name of the first release.""" + self.first_release_link: str | None = None + """The URL of the first release commit.""" + """The version of the first release.""" + self.first_nonzv_release_date: datetime.datetime | datetime.date | None = ( + first_nonzv_release_date + ) + """The date of the first non-0ver release.""" + self.first_nonzv_release_version: Version | None = ( + Version(first_nonzv_release_version) + if isinstance(first_nonzv_release_version, str) + else first_nonzv_release_version + ) + """The version of the first non-0ver release.""" + self.first_nonzv_release_tag: str | None = None + """The tag name of the first non-0ver release.""" + self.first_nonzv_release_link: str | None = None + """The URL of the first non-0ver release commit.""" + self.last_zv_release_version: Version | None = ( + Version(last_zv_release_version) + if isinstance(last_zv_release_version, str) + else last_zv_release_version + ) + """The last 0ver release before the project left ZeroVer.""" + + @classmethod + def from_dict(cls, info: dict): + return cls(**info) + + def to_dict(self) -> dict: + hide = ["tag_regex_subs"] + return { + k: v for k, v in self.__dict__.items() if v is not None and k not in hide + } + + +class Entry: + def __init__(self, info: dict, args: argparse.Namespace): + self.info = ProjectsEntry.from_dict(info) + if self.info.gh_url: + self.gh_org = self.info.gh_url.split("/")[3] + self.gh_repo = self.info.gh_url.split("/")[4] + self.api = GitHubAPI(args.user, args.token, self.gh_org, self.gh_repo) + else: + self.gh_org = None + self.gh_repo = None + self.api = None + self.tags: list[GitHubTag] = [] + self.failed_tags: list[GitHubTag] = [] + self.duplicate_tags: list[GitHubTag] = [] + def update_gh_project_info(self): + if self.api is None: + return -def parse_tags( - tags_data: list[GitHubTagDict], regex_subs: list[RegexSubstituionDict] | None = None -) -> tuple[ - list[GitHubParsedTagDict], list[GitHubDebugTagDict], list[GitHubDebugTagDict] -]: - """Parse the list of GitHub tags returning the tags with the PEP 440 compatible version objects. - - Parameters - ---------- - tags_data: list[dict] - The list of GitHub tags to parse from the API. - regex_subs: list[RegexSubstituionDict] | None = None - The list of regex substitutions from projects.yaml to apply to the tag names before parsing. - - Returns: - parsed_tags_data: list[dict] - The list of properly parsed tags with the added "version" key. - failed_tags_data: list[dict] - The list of tags that failed to be parsed with the added "sub_name" key for debugging. - duplicate_tags_data: list[dict] - The list of duplicate tags with the added "sub_name" key for debugging. - """ - tag_names: set[str] = set() - parsed_tags_data: list[GitHubParsedTagDict] = [] - failed_tags_data: list[GitHubDebugTagDict] = [] - duplicate_tags_data: list[GitHubDebugTagDict] = [] - - for tag in reversed(tags_data): - tag_name = tag["name"] - if regex_subs: - for sub in regex_subs: - if sub.get("remove"): - tag_name = re.sub(sub["remove"], "", tag_name) - else: - tag_name = re.sub(sub["search"], sub["replace"], tag_name) - if tag_name in tag_names: - duplicate_tags_data.append({**tag, "sub_name": tag_name}) - continue - else: - tag_names.add(tag_name) + repo_info = self.api.get_repo_info() + self.info.star_count = repo_info["star_count"] + + self.get_tags() + if not self.tags: + return - if is_version_compatible(tag_name): - parsed_tags_data.append({**tag, "version": Version(tag_name)}) + self.info.release_count = len(self.tags) + + # Latest release + # TODO: ensure latest_release_version is Version() compatible in the check_projects_json.py script + if not self.info.latest_release_version: + latest_release = self.tags[0] + self.info.latest_release_tag = latest_release.name + self.info.latest_release_link = latest_release.commit_url + self.info.latest_release_date = latest_release.committed_date + self.info.latest_release_version = latest_release.version + + # First release + first_release = None + if not self.info.first_release_version: + first_releases = [ + v for v in self.tags if v.version == self.info.first_release_version + ] + if first_releases: + first_release = first_releases[0] else: - failed_tags_data.append({**tag, "sub_name": tag_name}) + first_release = self.tags[-1] + if first_release: + self.info.first_release_tag = first_release.name + self.info.first_release_link = first_release.commit_url + self.info.first_release_date = first_release.committed_date + self.info.first_release_version = first_release.version + + # ZeroVer releases + zv_releases = [t for t in self.tags if t.version and t.version.major == 0] + self.info.release_count_zv = len(zv_releases) + print( + f" .. {self.info.release_count} releases, {self.info.release_count_zv} 0ver" + ) - return ( - list(reversed(parsed_tags_data)), - list(reversed(failed_tags_data)), - duplicate_tags_data, - ) + self.info.is_zerover = ( + self.info.latest_release_version is not None + and self.info.latest_release_version.major == 0 + ) + if self.info.is_zerover: + return + + # Last ZeroVer release + if not self.info.last_zv_release_version: + last_zv_release = zv_releases[0] + self.info.last_zv_release_version = last_zv_release.version + + # First non-ZeroVer release + if not self.info.first_nonzv_release_version: + nonzv_releases = [ + t for t in self.tags if t.version and t.version.major != 0 + ] + first_nonzv_release = nonzv_releases[-1] + self.info.first_nonzv_release_tag = first_nonzv_release.name + self.info.first_nonzv_release_link = first_nonzv_release.commit_url + self.info.first_nonzv_release_date = first_nonzv_release.committed_date + self.info.first_nonzv_release_version = first_nonzv_release.version + + def get_tags(self): + if not self.api: + return + + tags_data = self.api.fetch_tags() + + tag_names = set() + self.tags = [] + self.failed_tags = [] + self.duplicate_tags = [] + for tag in reversed(tags_data): + tag.process_name(self.info.tag_regex_subs) + if tag.processed_name in tag_names: + self.duplicate_tags.append(tag) + continue + else: + tag_names.add(tag.processed_name) + + if tag.is_version_compatible(): + tag.parse_version() + self.tags.append(tag) + else: + self.failed_tags.append(tag) + + self.tags = list(reversed(self.tags)) + self.duplicate_tags = list(reversed(self.duplicate_tags)) + self.failed_tags = list(reversed(self.failed_tags)) -def get_gh_project_info( - info: ProjectsInputEntryDict, args: argparse.Namespace -) -> GitHubInfoDict: - gh_info: GitHubInfoDict = {} # type: ignore - url = info.get("gh_url") - if url is None: - return gh_info - - org, repo = URL(url.rstrip("/")).path_parts[1:] - gh_url = URL("https://api.github.com/repos") - gh_url.path_parts += (org, repo) - - project_data = _get_gh_json(gh_url.to_text(), args) - if isinstance(project_data, dict): - gh_info["star_count"] = project_data["stargazers_count"] - - gh_url.path_parts += ("tags",) - tags_data: list[GitHubTagDict] = _get_gh_json(gh_url.to_text(), args) # type: ignore - parsed_tags_data, _, _ = parse_tags(tags_data, info.get("tag_regex_subs")) - if not parsed_tags_data: - return gh_info - - gh_info["release_count"] = len(parsed_tags_data) - - # Latest release - if "latest_release_date" not in info or "latest_release_version" not in info: - latest_release = parsed_tags_data[0] - latest_release_data = _get_gh_rel_data(latest_release, args) - for k, v in latest_release_data.items(): - gh_info[f"latest_release_{k}"] = v - else: - info["latest_release_version"] = Version(info["latest_release_version"]) # type: ignore - # TODO: ensure latest_release_version is Version() compatible in the check_projects_json.py script +def generate(args: argparse.Namespace): + start_time = time.time() + projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" + with projects_yaml_path.open() as f: + projects: list[dict] = yaml.safe_load(f)["projects"] - # Sort after grabbing the latest release - # TODO: check if this is needed - # parsed_tags_data.sort(key=lambda x: x["version"], reverse=True) + if not projects: + return - # First release - first_release = None - if "first_release_version" in info: - first_releases = [ - v for v in parsed_tags_data if v["name"] == info["first_release_version"] - ] - if first_releases: - first_release = first_releases[0] + projects_json_path = Path(__file__).parent.parent / "projects.json" + try: + with projects_json_path.open() as f: + cur_data = json.load(f) + cur_projects: list[dict] = cur_data["projects"] + cur_gen_date = datetime.datetime.fromisoformat(cur_data["gen_date"]) + except (IOError, KeyError): + cur_projects = [] + cur_gen_date = None + + if cur_gen_date: + fetch_outdated = ( + datetime.datetime.now() - cur_gen_date.replace(tzinfo=None) + ) > datetime.timedelta(seconds=3600) else: - first_release = parsed_tags_data[-1] - if first_release: - first_release_data = _get_gh_rel_data(first_release, args) - for k, v in first_release_data.items(): - gh_info[f"first_release_{k}"] = v - - # ZeroVer releases - zv_releases = [] - for rel in parsed_tags_data: - if rel["version"].major == 0: - zv_releases.append(rel) - gh_info["release_count_zv"] = len(zv_releases) - print( - f' .. {gh_info["release_count"]} releases, {gh_info["release_count_zv"]} 0ver' - ) + fetch_outdated = True + + cur_names = sorted([c["name"] for c in cur_projects]) + new_names = sorted([n["name"] for n in projects]) - gh_info["is_zerover"] = gh_info["latest_release_version"].major == 0 # type: ignore - if gh_info["is_zerover"]: - return gh_info + if fetch_outdated or cur_names != new_names or args.disable_caching: + entries: list[dict] = [] - # Last ZeroVer release - last_zv_release = zv_releases[0] - gh_info["last_zv_release_version"] = last_zv_release["name"] + for p in projects: + print("Processing", p["name"]) + if p.get("skip"): + continue - # First non-ZeroVer release - first_nonzv_release = parsed_tags_data[parsed_tags_data.index(last_zv_release) - 1] - first_nonzv_release_data = _get_gh_rel_data(first_nonzv_release, args) - for k, v in first_nonzv_release_data.items(): - gh_info[f"first_nonzv_release_{k}"] = v + entry = Entry(p, args) + if not entry.info.url and entry.info.gh_url: + entry.info.url = entry.info.gh_url - return gh_info + if entry.info.gh_url: + entry.update_gh_project_info() + entries.append(entry.info.to_dict()) -def fetch_entries( - projects: list[ProjectsInputEntryDict], args: argparse.Namespace -) -> list[ProjectsOutputEntryDict]: - entries: list[ProjectsOutputEntryDict] = [] + entries = sorted(entries, key=lambda e: e["name"]) + else: + print("Current data already up to date, exiting.") + return - for p in projects: - print("Processing", p["name"]) - info: ProjectsOutputEntryDict = cast(ProjectsOutputEntryDict, p) - if info.get("skip"): - continue + pprint(entries) - info["url"] = info.get("url", info.get("gh_url")) + res = { + "projects": entries, + "gen_date": datetime.datetime.now(datetime.timezone.utc).isoformat(), + "gen_duration": time.time() - start_time, + } + + with projects_json_path.open("w") as f: + json.dump(res, f, indent=2, sort_keys=True, default=json_default) - if info.get("gh_url"): - gh_info = get_gh_project_info(p, args) - # Only add new data, preserve any manual information - info.update({k: v for k, v in gh_info.items() if k not in info}) # type: ignore - info["is_zerover"] = info.get("is_zerover", not info.get("emeritus", False)) +def info(args: argparse.Namespace): + print("Processing", args.name_or_link) - entries.append(info) + if parse(args.name_or_link).scheme in ("http", "https"): + info = {"gh_url": args.name_or_link} + else: + info = get_entry_from_name(args.name_or_link) - return sorted(entries, key=lambda e: e["name"]) + entry = Entry(info, args) + entry.update_gh_project_info() + + print() + pprint(entry.info.to_dict()) + + +def tags(args: argparse.Namespace): + print("Processing", args.name_or_link) + + if parse(args.name_or_link).scheme in ("http", "https"): + info = {"gh_url": args.name_or_link} + else: + info = get_entry_from_name(args.name_or_link) + + entry = Entry(info, args) + entry.get_tags() + + print("\nParsed tags:") + for t in entry.tags: + print(f"{t.name} (parsed as {t.version})") + if not entry.tags: + print("No tags parsed.") + if entry.duplicate_tags: + print("\nDuplicate tags:") + for t in entry.duplicate_tags: + print(f"{t.name} (parsed as {t.processed_name})") + if entry.failed_tags: + print("\nFailed tags:") + for t in entry.failed_tags: + print(f"{t.name} (tried {t.processed_name})") def parse_args(): @@ -432,7 +565,7 @@ def add_options(parser: argparse.ArgumentParser, *, caching: bool = False): return args -def get_entry_from_name(name: str) -> ProjectsInputEntryDict: +def get_entry_from_name(name: str) -> dict: projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" with projects_yaml_path.open() as f: projects = yaml.safe_load(f)["projects"] @@ -448,96 +581,9 @@ def main(): if args.command == "generate": generate(args) elif args.command == "info": - print("Processing", args.name_or_link) - - if parse(args.name_or_link).scheme in ("http", "https"): - info = {"gh_url": args.name_or_link} - else: - info = get_entry_from_name(args.name_or_link) - - gh_info = get_gh_project_info(info, args) # type: ignore - - print() - pprint(gh_info) + info(args) elif args.command == "tags": - print("Processing", args.name_or_link) - - if parse(args.name_or_link).scheme in ("http", "https"): - info = {"gh_url": args.name_or_link} - else: - info = get_entry_from_name(args.name_or_link) - - org, repo = URL(info["gh_url"].rstrip("/")).path_parts[1:] - gh_url = URL("https://api.github.com/repos") - gh_url.path_parts += (org, repo, "tags") - - tags_data: list[GitHubTagDict] = _get_gh_json(gh_url.to_text(), args) # type: ignore - parsed_tags_data, failed_tags_data, duplicate_tag_names = parse_tags( - tags_data, info.get("tag_regex_subs") - ) - - print("\nParsed tags:") - for t in parsed_tags_data: - print(f"{t['name']} (parsed as {t['version']})") - if not parsed_tags_data: - print("No tags parsed.") - if duplicate_tag_names: - print("\nDuplicate tags:") - for t in duplicate_tag_names: - print(f"{t['name']} (parsed as {t['sub_name']})") - if failed_tags_data: - print("\nFailed tags:") - for t in failed_tags_data: - print(f"{t['name']} (tried {t['sub_name']})") - - -def generate(args: argparse.Namespace): - start_time = time.time() - projects_yaml_path = Path(__file__).parent.parent / "projects.yaml" - with projects_yaml_path.open() as f: - projects = yaml.safe_load(f)["projects"] - - if not projects: - return - - projects_json_path = Path(__file__).parent.parent / "projects.json" - try: - with projects_json_path.open() as f: - cur_data = json.load(f) - cur_projects: list[ProjectsInputEntryDict] = cur_data["projects"] - cur_gen_date = datetime.datetime.fromisoformat(cur_data["gen_date"]) - except (IOError, KeyError): - cur_projects = [] - cur_gen_date = None - - if cur_gen_date: - fetch_outdated = ( - datetime.datetime.now() - cur_gen_date.replace(tzinfo=None) - ) > datetime.timedelta(seconds=3600) - else: - fetch_outdated = True - - cur_names = sorted([c["name"] for c in cur_projects]) - new_names = sorted([n["name"] for n in projects]) - - if fetch_outdated or cur_names != new_names or args.disable_caching: - entries = fetch_entries(projects, args) - else: - print("Current data already up to date, exiting.") - return - - # pprint(entries) - - res = { - "projects": entries, - "gen_date": datetime.datetime.now(datetime.timezone.utc).isoformat(), - "gen_duration": time.time() - start_time, - } - - with projects_json_path.open("w") as f: - json.dump(res, f, indent=2, sort_keys=True, default=json_default) - - sys.exit(0) + tags(args) if __name__ == "__main__": From 92a7928cc5088286eac506aac3d509d9dfe1943e Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Tue, 31 Dec 2024 11:15:42 -0500 Subject: [PATCH 05/10] fix: update projects.yaml --- projects.yaml | 68 ++++++++++++++++---------------------- tools/gen_projects_json.py | 6 ++-- 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/projects.yaml b/projects.yaml index fe90dc1..6d8c266 100644 --- a/projects.yaml +++ b/projects.yaml @@ -30,9 +30,6 @@ projects: - remove: ^kafka- - search: -incubating-candidate-(\d+)$ replace: rc\1 - - remove: -KAFKA-\d+$ - - remove: -cp$ - - remove: -beta\d+-candidate\d+$ - name: Minikube gh_url: https://github.com/kubernetes/minikube reason: Official kubernetes project with a logo, but no major release. @@ -47,11 +44,11 @@ projects: - remove: ^tor- - remove: ^debian-version- - remove: -root$ - - remove: "@\\d+$" - - remove: incompat-merged$ - - remove: -cvs-\d+-\d+$ + - search: "@(\\d+)$" + replace: +\1 + - search: -cvs-(\d+-\d+)$ + replace: +\1 - remove: -pre\.\d+$ - - remove: -$ - name: Home Assistant gh_url: https://github.com/home-assistant/home-assistant - name: Vala @@ -98,8 +95,6 @@ projects: gh_url: https://github.com/scipy/scipy - name: Pandas gh_url: https://github.com/pandas-dev/pandas - tag_regex_subs: - - remove: _ahl\d+$ - name: Wheel gh_url: https://github.com/pypa/wheel reason: Depended on by production Python deployments everywhere. @@ -114,12 +109,12 @@ projects: - name: certbot gh_url: https://github.com/certbot/certbot tag_regex_subs: - - remove: -corrected$ + - search: -corrected$ + replace: "-1" - name: sshuttle gh_url: https://github.com/sshuttle/sshuttle tag_regex_subs: - remove: ^sshuttle- - - remove: -macos-bin$ - name: Theano gh_url: https://github.com/Theano/Theano tag_regex_subs: @@ -141,15 +136,15 @@ projects: - name: drone gh_url: https://github.com/drone/drone tag_regex_subs: - - remove: -debug$ - - remove: -gitspaces-beta$ + - search: -debug$ + replace: -dev + - search: -gitspaces-beta$ + replace: -beta - name: HashiCorp Terraform gh_url: https://github.com/hashicorp/terraform - name: HashiCorp Nomad gh_url: https://github.com/hashicorp/nomad tag_regex_subs: - - remove: -changelog$ - - remove: -connect1$ - remove: ^ent-changelog- - name: HashiCorp Vault gh_url: https://github.com/hashicorp/vault @@ -157,7 +152,8 @@ projects: first_release_date: 2015-04-29 first_release_version: v0.1.0 tag_regex_subs: - - remove: -rebuild$ + - search: -rebuild$ + replace: "-1" - remove: ^ent-changelog- - search: -rc(\d+)\.(\d+)$ replace: -rc\1+\2 @@ -210,7 +206,6 @@ projects: gh_url: https://github.com/pilosa/pilosa url: https://www.pilosa.com/ tag_regex_subs: - - remove: -community$ - search: -alpha\.(\d+)(-pre|\.)(\d+)$ replace: -alpha\1+\3 - name: fail2ban @@ -232,8 +227,6 @@ projects: - search: CLAMAV_(\d+)_(\d+)(RC\d+)? replace: \1.\2\3 - remove: ^clamav- - - remove: -dmgxar$ - - remove: _sf$ - search: "@(\\d+)$" replace: +\1 - name: OpenRCT2 @@ -266,7 +259,7 @@ projects: tag_regex_subs: - search: -insiders$ replace: rc1 - - search: -insiders?(\d+)$ + - search: -insiders(\d+)$ replace: rc\1 - search: -debug$ replace: dev @@ -368,7 +361,6 @@ projects: - name: Tendermint gh_url: https://github.com/tendermint/tendermint/ tag_regex_subs: - - remove: -autodraft$ - search: -dev0-fix0$ replace: dev - search: ^dev-(.*) @@ -380,12 +372,9 @@ projects: latest_release_date: 2024-12-16 latest_release_version: 0.50.11 tag_regex_subs: - - remove: -ics - - remove: -lsm - remove: -circuit$ - - remove: -streaming$ - - remove: -patch$ - - remove: -iris$ + - search: -patch$ + replace: "-1" - name: LocalStack url: https://localstack.cloud gh_url: https://github.com/localstack/localstack @@ -411,7 +400,6 @@ projects: gh_url: https://github.com/OpenRC/openrc tag_regex_subs: - remove: ^openrc- - - remove: ^funtoo-openrc- - name: Notary gh_url: https://github.com/notaryproject/notary - name: GoodbyeDPI @@ -478,7 +466,8 @@ projects: gh_url: https://github.com/MultiMC/Launcher reason: Prominent launcher for maintaining multiple instances of MineCraft. tag_regex_subs: - - remove: -final$ + - search: -final$ + replace: "-1" - name: Factor url: https://factorcode.org/ gh_url: https://github.com/factor/factor @@ -611,8 +600,8 @@ projects: url: https://www.metabase.com/ gh_url: https://github.com/metabase/metabase tag_regex_subs: # Metabase has really wierd mixed versioning - - remove: ^v2015060\d - - remove: v1.[345]\d\.\d+(\.\d+)? # Hide non-0vers, not sure why they use them... + - remove: ^v2015060\d-alpha + - remove: v1.[345]\d\.\d+(\.\d+)?.* # Hide non-0vers, not sure why they use them... - name: esbuild url: https://esbuild.github.io gh_url: https://github.com/evanw/esbuild @@ -627,19 +616,20 @@ projects: emeritus: true first_release_date: 1998-12-23 first_release_version: 0.9.1 # release name convention is weird, hardcode counts 2018-03-31 - last_zv_release_version: 0.9.8n - first_nonzv_release_date: 2010-03-29 tag_regex_subs: - - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+)([a-z]{1,2}) + - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-)(\d+)_(\d+)_(\d+)([a-z]{1,2}) replace: \2.\3.\4+\5 - - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-|OpenSSL-engine-)(\d+)_(\d+)_(\d+) + - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-)(\d+)_(\d+)_(\d+) replace: \2.\3.\4 - search: ^OpenSSL-fips-(\d+)_(\d+) replace: \1.\2 - remove: ^openssl- - - remove: -auto-reformat$ - - remove: -format$ - - remove: -reformat$ + - search: -auto-reformat$ + replace: "-1" + - search: -format$ + replace: "-2" + - search: -reformat$ + replace: "-3" - name: MAME gh_url: https://github.com/mamedev/mame wp_url: https://en.wikipedia.org/wiki/MAME @@ -666,7 +656,6 @@ projects: - remove: ^wm- - remove: ^release- - remove: -crm$ - - remove: \+$ - name: ReactOS url: https://www.reactos.org/ gh_url: https://github.com/reactos/reactos # Many tags are missing, use manual data below @@ -734,7 +723,6 @@ projects: replace: dev\1+\2 - search: test(\d+)$ replace: dev\1 - - remove: -really$ - name: cargo-audit url: https://rustsec.org/ gh_url: https://github.com/rustsec/rustsec @@ -846,7 +834,7 @@ projects: reason: "Open map data used almost anywhere there's a non-Google map." first_release_version: 0.3 first_release_date: 2004-08-09 - latest_release_version: 0.6 February 2021 # See https://wiki.openstreetmap.org/wiki/API_v0.6#Semantic_versioning + latest_release_version: 0.6 latest_release_date: 2021-02-01 release_count: 14 # 0.3 → 0.6 and then the dated changes to 0.6 - name: XeTeX diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index 5ec45c4..b393377 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -312,7 +312,7 @@ def update_gh_project_info(self): # First release first_release = None - if not self.info.first_release_version: + if self.info.first_release_version: first_releases = [ v for v in self.tags if v.version == self.info.first_release_version ] @@ -368,7 +368,7 @@ def get_tags(self): self.duplicate_tags = [] for tag in reversed(tags_data): tag.process_name(self.info.tag_regex_subs) - if tag.processed_name in tag_names: + if tag.processed_name and tag.processed_name in tag_names: self.duplicate_tags.append(tag) continue else: @@ -382,6 +382,8 @@ def get_tags(self): self.tags = list(reversed(self.tags)) self.duplicate_tags = list(reversed(self.duplicate_tags)) + if self.duplicate_tags: + print(self.info.name, [t.name for t in self.duplicate_tags]) self.failed_tags = list(reversed(self.failed_tags)) From 0eb5c80988f87c2357d589eab5cfb70ef6fdba62 Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Tue, 31 Dec 2024 11:44:01 -0500 Subject: [PATCH 06/10] fix: remove unneeding manual data --- projects.yaml | 82 +++++++++----------------------------- tools/gen_projects_json.py | 1 + 2 files changed, 19 insertions(+), 64 deletions(-) diff --git a/projects.yaml b/projects.yaml index 6d8c266..5fe15aa 100644 --- a/projects.yaml +++ b/projects.yaml @@ -148,9 +148,6 @@ projects: - remove: ^ent-changelog- - name: HashiCorp Vault gh_url: https://github.com/hashicorp/vault - last_zv_release_version: v0.11.6 - first_release_date: 2015-04-29 - first_release_version: v0.1.0 tag_regex_subs: - search: -rebuild$ replace: "-1" @@ -272,8 +269,6 @@ projects: - name: React url: https://reactjs.org gh_url: https://github.com/facebook/react - first_release_date: 2013-05-29T19:46:11Z - first_release_version: 0.3.0 # A later v0.0.0 tag breaks this tag_regex_subs: - search: -alpha\.(.*)$ replace: -alpha+\1 @@ -369,8 +364,6 @@ projects: gh_url: https://github.com/cosmos/cosmos-sdk first_release_date: 2017-03-06 first_release_version: 0.2.0 # https://github.com/tendermint/basecoin/blob/master/CHANGELOG.md#020-march-6-2017 - latest_release_date: 2024-12-16 - latest_release_version: 0.50.11 tag_regex_subs: - remove: -circuit$ - search: -patch$ @@ -556,8 +549,6 @@ projects: - name: Thanos url: https://thanos.io gh_url: https://github.com/thanos-io/thanos - first_release_date: 2018-05-18 - first_release_version: 0.1.0-rc.0 # https://github.com/thanos-io/thanos/releases/tag/v0.1.0-rc.0 - name: PyVista url: https://docs.pyvista.org/ gh_url: https://github.com/pyvista/pyvista @@ -614,8 +605,6 @@ projects: - name: OpenSSL gh_url: https://github.com/openssl/openssl emeritus: true - first_release_date: 1998-12-23 - first_release_version: 0.9.1 # release name convention is weird, hardcode counts 2018-03-31 tag_regex_subs: - search: ^(OpenSSL_|SSLeay_|OpenSSL-fips-|OpenSSL-fips-)(\d+)_(\d+)_(\d+)([a-z]{1,2}) replace: \2.\3.\4+\5 @@ -633,10 +622,8 @@ projects: - name: MAME gh_url: https://github.com/mamedev/mame wp_url: https://en.wikipedia.org/wiki/MAME - first_release_version: 0.1 first_release_date: 1997-02-05 - latest_release_version: 0.272 - latest_release_date: 2024-11-29 + first_release_version: 0.1 tag_regex_subs: - search: ^mame(\d)(\d+)(u\d+)$ replace: \1.\2+\3 @@ -646,11 +633,8 @@ projects: url: https://windowmaker.org/ gh_url: https://github.com/window-maker/wmaker wp_url: https://en.wikipedia.org/wiki/Window_Maker - first_release_date: 1997-01-01 # exact date unknown + first_release_date: 1997-01-01 # Exact date unknown first_release_version: 0.0.3 - latest_release_version: 0.96.0 - latest_release_date: 2023-08-05 - release_count: 94 tag_regex_subs: - remove: ^wmaker- - remove: ^wm- @@ -660,21 +644,12 @@ projects: url: https://www.reactos.org/ gh_url: https://github.com/reactos/reactos # Many tags are missing, use manual data below reason: A free Windows-compatible Operating System - first_release_version: 0.0.7 - first_release_date: 1996-01-23 - latest_release_version: 0.4.14 - latest_release_date: 2021-12-16 - release_count: 59 # Ignore GitHub saying >250 releases, ~80% of them are some kind of weird backup non-releases tag_regex_subs: - remove: ^ReactOS- - remove: -release$ - name: three.js url: https://threejs.org/ gh_url: https://github.com/mrdoob/three.js - first_release_version: 0.1 - first_release_date: 2013-07-03T11:49:48 - latest_release_version: 0.171.0 - latest_release_date: 2024-11-29 tag_regex_subs: - remove: /ROME$ - search: ^r(\d+)$ @@ -682,11 +657,7 @@ projects: - name: google-api-client (ruby) url: https://rubygems.org/gems/google-api-client/ gh_url: https://github.com/googleapis/google-api-ruby-client - first_release_version: 0.1.0 - first_release_date: 2010-10-14 - latest_release_version: 0.15.1 # Their versioning is really hard to figure out... - latest_release_date: 2024-07-29 - release_count: 254 + # Their versioning is really hard to figure out... tag_regex_subs: - remove: ^google-api-client- - remove: ^google-api-client/ @@ -696,18 +667,11 @@ projects: url: https://rust-random.github.io/book/ gh_url: https://github.com/rust-random/rand reason: The most downloaded Rust crate - first_release_version: 0.1.1 - first_release_date: 2015-02-03 - latest_release_version: 0.8.5 - latest_release_date: 2022-02-14 - release_count: 68 - name: suhosin url: https://suhosin.org/ gh_url: https://github.com/sektioneins/suhosin - first_release_version: 0.9.1 first_release_date: 2006-09-16T00:00:00 - latest_release_version: 0.9.38 - latest_release_date: 2015-05-21T00:00:00 + first_release_version: 0.9.1 tag_regex_subs: - remove: ^suhosin- - name: Pure Data @@ -716,8 +680,6 @@ projects: url: https://puredata.info/ first_release_date: 1996-06-01 first_release_version: 0.1 - latest_release_version: 0.55-2 - latest_release_date: 2024-11-17 tag_regex_subs: - search: test(\d+)\.?([a-z\d])$ replace: dev\1+\2 @@ -726,12 +688,18 @@ projects: - name: cargo-audit url: https://rustsec.org/ gh_url: https://github.com/rustsec/rustsec - first_release_date: 2017-02-27 - first_release_version: 0.1.0 - latest_release_date: 2024-10-29 - latest_release_version: 0.21.0 tag_regex_subs: - remove: ^cargo-audit/ + - name: pg (Ruby) + url: https://rubygems.org/gems/pg + gh_url: https://github.com/ged/ruby-pg/tags + reason: Leading library for connecting Ruby to Postgres + first_release_date: 2008-01-26 + first_release_version: 0.7.9.2008.01.28 + - name: distlib + url: https://distlib.readthedocs.io + gh_url: https://github.com/pypa/distlib + reason: Depended on by PyPA/Pip # Non-GitHub projects below, manually updated - name: ASCEND @@ -744,6 +712,7 @@ projects: latest_release_version: 0.9.8 - name: Dash url: https://git.kernel.org/pub/scm/utils/dash/dash.git + repo_url: https://git.kernel.org/pub/scm/utils/dash/dash.git first_release_date: 1997-06-19T09:29:16 first_release_version: 0.3.1 latest_release_date: 2022-12-11T06:33:43 @@ -758,7 +727,7 @@ projects: latest_release_date: 2024-04-23 - name: Inkscape url: https://inkscape.org/ - repo_url: https://launchpad.net/inkscape + repo_url: https://gitlab.com/inkscape/inkscape emeritus: true first_release_date: 2000-09-01 first_release_version: 0.16 # sodipodi cvs import according to changelog @@ -770,13 +739,6 @@ projects: first_release_date: 2006-05-22 latest_release_date: 2022-08-22 latest_release_version: 0.9.14.2 - - name: distlib - url: https://bitbucket.org/pypa/distlib - reason: Depended on by PyPA/Pip - first_release_version: 0.1.0 - first_release_date: 2013-03-02 - latest_release_version: 0.3.4 - latest_release_date: 2021-12-08 - name: Factorio url: https://factorio.com/ reason: Popular, for-profit game. @@ -785,17 +747,9 @@ projects: first_release_version: 0.1.0 first_nonzv_release_date: 2020-08-14 last_zv_release_version: 0.18.47 - - name: pg (Ruby) - url: https://rubygems.org/gems/pg - reason: Leading library for connecting Ruby to Postgres - emeritus: true - first_release_version: 0.7.9.2008.01.28 - first_release_date: 2008-01-26 - last_zv_release_version: 0.21.0 - first_nonzv_release_date: 2017-06-13 - release_count_zv: 123 - name: PuTTY - url: https://www.chiark.greenend.org.uk/~sgtatham/putty/ + url: https://www.chiark.greenend.org.uk/~sgtatham/putty + repo_url: https://git.tartarus.org/?p=simon/putty.git reason: Probably the most popular SSH client in Windows history first_release_version: 0.43beta first_release_date: 1999-01-08 diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index b393377..f6a82da 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -296,6 +296,7 @@ def update_gh_project_info(self): self.info.star_count = repo_info["star_count"] self.get_tags() + # TODO: Do pre releases, release candidates, post release fixes, dev releases, etc. count as releases? if not self.tags: return From 16f49f82f9f28e19f807f6400f97b2ede372acd7 Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Tue, 31 Dec 2024 11:49:31 -0500 Subject: [PATCH 07/10] feat: add star counts for gitlab repos --- projects.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects.yaml b/projects.yaml index 5fe15aa..abda394 100644 --- a/projects.yaml +++ b/projects.yaml @@ -733,6 +733,7 @@ projects: first_release_version: 0.16 # sodipodi cvs import according to changelog first_nonzv_release_date: 2020-05-01 last_zv_release_version: 0.92.5 + star_count: 3482 - name: Compiz url: https://www.compiz-fusion.org/ repo_url: https://launchpad.net/compiz @@ -808,6 +809,7 @@ projects: last_zv_release_version: 0.3.85 first_nonzv_release_date: 2023-11-26 release_count_zv: 119 + star_count: 410 - name: gettext url: https://www.gnu.org/software/gettext/ repo_url: https://savannah.gnu.org/projects/gettext/ @@ -825,6 +827,8 @@ projects: first_release_version: 0.0.2 first_nonzv_release_date: 2008-06-17 last_zv_release_version: 0.9.61 + release_count_zv: 298 + star_count: 118 - name: transformers url: https://hackage.haskell.org/package/transformers repo_url: https://hub.darcs.net/ross/transformers From 3b6530a4a792c6f0a844f6b59f9cbe71d4987baf Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Tue, 31 Dec 2024 12:20:08 -0500 Subject: [PATCH 08/10] fix: resolve todos --- projects.yaml | 4 ++-- tools/check_projects_yaml.py | 42 +++++++++++++++++++++++++++--------- tools/gen_projects_json.py | 11 ++++------ 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/projects.yaml b/projects.yaml index abda394..537847a 100644 --- a/projects.yaml +++ b/projects.yaml @@ -762,7 +762,7 @@ projects: emeritus: true first_release_date: 1994-08-13 # https://sourceforge.net/p/slrn/mailman/message/6405527/ first_release_version: 0.1.0.0 - last_zv_release_version: 0.9.9p1 + last_zv_release_version: 0.9.9-1 first_nonzv_release_date: 2012-12-21 release_count_zv: 71 # changelog_text.count('\nChanges since 0') - name: Dwarf Fortress @@ -779,7 +779,7 @@ projects: reason: Immensely popular cross-platform open-source game under continuous development for 6 years. first_release_version: 0.1 first_release_date: 2013-02-26 - latest_release_version: 0.H + # latest_release_version: 0.H # TODO: decide what to do here latest_release_date: 2024-11-22 release_count: 24 # https://cataclysmdda.org/releases/ - name: OpenStreetMap API/website diff --git a/tools/check_projects_yaml.py b/tools/check_projects_yaml.py index 36fca82..a692e11 100644 --- a/tools/check_projects_yaml.py +++ b/tools/check_projects_yaml.py @@ -5,6 +5,7 @@ import yaml from boltons.iterutils import redundant from hyperlink import parse +from packaging.version import InvalidVersion, Version from schema import Optional, Or, Schema @@ -14,6 +15,14 @@ def check_url(url_str: str): return True +def check_version(version_str: str | float): + try: + Version(str(version_str)) + except InvalidVersion: + return False + return True + + OPTIONAL = { Optional("gh_url"): check_url, Optional("repo_url"): str, @@ -22,6 +31,17 @@ def check_url(url_str: str): Optional("reason"): str, Optional("star_count"): int, } +REGEX = [ + Or( + { + "remove": str, + }, # type: ignore + { + "search": str, + "replace": str, + }, # type: ignore + ), +] IN_SCHEMA = Schema( { "projects": [ @@ -32,16 +52,17 @@ def check_url(url_str: str): "name": str, "gh_url": check_url, Optional("emeritus"): False, + Optional("tag_regex_subs"): REGEX, Optional("url"): check_url, # Overrides gh_url for the hyperlink Optional("release_count"): int, Optional("latest_release_date"): Or( datetime.date, datetime.datetime ), - Optional("latest_release_version"): Or(float, str), + Optional("latest_release_version"): check_version, Optional("first_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, }, # type: ignore # Emeritus GitHub projects { @@ -49,17 +70,18 @@ def check_url(url_str: str): "name": str, "gh_url": check_url, "emeritus": True, + Optional("tag_regex_subs"): REGEX, Optional("url"): check_url, # Overrides gh_url for the hyperlink Optional("release_count_zv"): int, Optional("first_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, Optional("first_nonzv_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_nonzv_release_version"): Or(float, str), - Optional("last_zv_release_version"): Or(float, str), + Optional("first_nonzv_release_version"): check_version, + Optional("last_zv_release_version"): check_version, }, # type: ignore # Non-GitHub projects { @@ -72,11 +94,11 @@ def check_url(url_str: str): Optional("latest_release_date"): Or( datetime.date, datetime.datetime ), - Optional("latest_release_version"): Or(float, str), + Optional("latest_release_version"): check_version, Optional("first_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, }, # type: ignore # Emeritus Non-GitHub projects { @@ -86,12 +108,12 @@ def check_url(url_str: str): "emeritus": True, Optional("release_count_zv"): int, "first_release_date": Or(datetime.date, datetime.datetime), - Optional("first_release_version"): Or(float, str), + Optional("first_release_version"): check_version, Optional("first_nonzv_release_date"): Or( datetime.date, datetime.datetime ), - Optional("first_nonzv_release_version"): Or(float, str), - Optional("last_zv_release_version"): Or(float, str), + Optional("first_nonzv_release_version"): check_version, + Optional("last_zv_release_version"): check_version, }, # type: ignore ) ], diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index f6a82da..dde36bb 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -7,11 +7,10 @@ import time from pathlib import Path from pprint import pprint -from typing import TypedDict, cast +from typing import TypedDict import requests import yaml -from boltons.urlutils import URL from hyperlink import parse from packaging.version import InvalidVersion, Version @@ -164,6 +163,8 @@ def fetch_tags(self) -> list[GitHubTag]: def json_default(obj): if isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() + if isinstance(obj, Version): + return str(obj) raise TypeError(f"{obj} is not serializable") @@ -199,9 +200,7 @@ def __init__( """The project's non-GitHub repository link.""" self.wp_url: str | None = wp_url """The project's Wikipedia link.""" - self.emeritus: bool | None = emeritus - """`true` if the project is no longer ZeroVer""" - self.is_zerover: bool = bool(self.emeritus) # TODO: combine with emeritus + self.is_zerover: bool = bool(emeritus) """Whether the project is still ZeroVer.""" self.reason: str | None = reason """The reason this project was added to the 0ver website listing.""" @@ -296,14 +295,12 @@ def update_gh_project_info(self): self.info.star_count = repo_info["star_count"] self.get_tags() - # TODO: Do pre releases, release candidates, post release fixes, dev releases, etc. count as releases? if not self.tags: return self.info.release_count = len(self.tags) # Latest release - # TODO: ensure latest_release_version is Version() compatible in the check_projects_json.py script if not self.info.latest_release_version: latest_release = self.tags[0] self.info.latest_release_tag = latest_release.name From 7a7208e34cd5eab7250141a7d0462b9129277bc7 Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Tue, 31 Dec 2024 12:56:22 -0500 Subject: [PATCH 09/10] chore: update requirements --- requirements.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3f4b9f1..7e64add 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,10 @@ boltons==24.1.0 # chert # face # lithoxyl +certifi==2024.12.14 + # via requests +charset-normalizer==3.4.1 + # via requests chert==21.0.0 # via -r requirements.in face==24.0.0 @@ -23,7 +27,9 @@ hyperlink==21.0.0 # -r requirements.in # chert idna==3.10 - # via hyperlink + # via + # hyperlink + # requests lithoxyl==21.0.0 # via chert markdown==3.7 @@ -34,11 +40,15 @@ pyyaml==6.0.2 # via # -r requirements.in # chert +requests==2.32.3 + # via -r requirements.in schema==0.7.7 # via -r requirements.in six==1.17.0 # via # html5lib # python-dateutil +urllib3==2.3.0 + # via requests webencodings==0.5.1 # via html5lib From 347a316c18b605e19fd926c7860bf2af5e61e1d8 Mon Sep 17 00:00:00 2001 From: Matthew Vine Date: Fri, 3 Jan 2025 19:09:16 -0500 Subject: [PATCH 10/10] feat: dont include pre/post/dev releases --- tools/gen_projects_json.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/gen_projects_json.py b/tools/gen_projects_json.py index dde36bb..06b341a 100644 --- a/tools/gen_projects_json.py +++ b/tools/gen_projects_json.py @@ -374,7 +374,14 @@ def get_tags(self): if tag.is_version_compatible(): tag.parse_version() - self.tags.append(tag) + if tag.version and ( + tag.version.is_devrelease + or tag.version.is_postrelease + or tag.version.is_prerelease + ): + self.duplicate_tags.append(tag) + else: + self.tags.append(tag) else: self.failed_tags.append(tag)