From dd14c84477791fb03583a6d412cdedaff88c4e1e Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 21:49:47 +0000 Subject: [PATCH 1/8] add check_html --- .github/scripts/check_html.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/scripts/check_html.py diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py new file mode 100644 index 0000000000000..97bf82d677fc2 --- /dev/null +++ b/.github/scripts/check_html.py @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from bs4 import BeautifulSoup +import glob + +def is_valid_HTML_tag(html_string_to_check: str) -> bool: + soup = BeautifulSoup(html_string_to_check, 'html.parser') + return html_string_to_check == str(soup) + +if __name__ == "__main__": + htmls = glob.glob(r'*.html') + for html in htmls: + with open(html, "r") as fp: + content = fp.read() + if not is_valid_HTML_tag(content): + exit(1) From 8de5596dee33b74ecf9489c39f2d77e2387ef015 Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 22:14:18 +0000 Subject: [PATCH 2/8] add to flow --- .github/scripts/check_html.py | 2 +- .github/workflows/build.yml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py index 97bf82d677fc2..1ea33012bc96b 100644 --- a/.github/scripts/check_html.py +++ b/.github/scripts/check_html.py @@ -22,7 +22,7 @@ def is_valid_HTML_tag(html_string_to_check: str) -> bool: return html_string_to_check == str(soup) if __name__ == "__main__": - htmls = glob.glob(r'*.html') + htmls = glob(pathname="**/*.html", recursive=True) for html in htmls: with open(html, "r") as fp: content = fp.read() diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e1992a4059281..d521bcc7fa647 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -143,6 +143,11 @@ jobs: find ./site-docs/generated -type f -exec grep -L "." {} \; >&2 exit 1 fi + - name: Check html content + run: | + if [ python .github/scripts/check_html.py -ne 0]; then + exit 1 + fi test: needs: [validate, load-catalog] From d978421f08dd189ad0917bbd56c54f48a7c302c3 Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 22:21:31 +0000 Subject: [PATCH 3/8] Revert "MINOR: add missing
  • to upgrade.html (#18817)" This reverts commit c1a813b7400ff38bef887f1e1a9d2ef801afb5cb. --- docs/upgrade.html | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/upgrade.html b/docs/upgrade.html index 3c42da282145e..1ff685c95cd80 100644 --- a/docs/upgrade.html +++ b/docs/upgrade.html @@ -339,7 +339,6 @@
    Notable changes in 4 This allows to not only collect the metric of the internally used clients of a Kafka Streams appliction via a broker-side plugin, but also to collect the metrics of the Kafka Streams runtime itself.
  • -
  • The default value of 'num.recovery.threads.per.data.dir' has been changed from 1 to 2. The impact of this is faster recovery post unclean shutdown at the expense of extra IO cycles. See KIP-1030 From 10739f836020e1d4b1d50b5a7cd85ef6cbaa45cf Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 22:40:23 +0000 Subject: [PATCH 4/8] test --- .github/scripts/check_html.py | 2 ++ .github/workflows/build.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py index 1ea33012bc96b..1dcfb01aa4c33 100644 --- a/.github/scripts/check_html.py +++ b/.github/scripts/check_html.py @@ -23,8 +23,10 @@ def is_valid_HTML_tag(html_string_to_check: str) -> bool: if __name__ == "__main__": htmls = glob(pathname="**/*.html", recursive=True) + print("kkkk html", htmls) for html in htmls: with open(html, "r") as fp: content = fp.read() + print("kkk check content") if not is_valid_HTML_tag(content): exit(1) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d521bcc7fa647..0de7c2b4291da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -144,7 +144,9 @@ jobs: exit 1 fi - name: Check html content + if: always() run: | + echo "execute Check html content" if [ python .github/scripts/check_html.py -ne 0]; then exit 1 fi From ebfc724c19fc3931af0110a89b14f51535ff2eca Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 23:08:26 +0000 Subject: [PATCH 5/8] fix shell --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0de7c2b4291da..ef69994f7caad 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -147,7 +147,7 @@ jobs: if: always() run: | echo "execute Check html content" - if [ python .github/scripts/check_html.py -ne 0]; then + if [ "$(python .github/scripts/check_html.py)" -ne 0 ]; then exit 1 fi From e9766cc9061075d208f821fd604584e3297e85aa Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 23:19:13 +0000 Subject: [PATCH 6/8] add dependency --- .github/scripts/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt index d59455f79dac6..91f84eecbc4d8 100644 --- a/.github/scripts/requirements.txt +++ b/.github/scripts/requirements.txt @@ -15,3 +15,4 @@ PyYAML~=6.0 pytz==2024.2 requests==2.32.3 +beautifulsoup4==4.10.0 From e38945a6c4da28f3038de904ae0074d029aaec0c Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 23:27:23 +0000 Subject: [PATCH 7/8] fix glob --- .github/scripts/check_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py index 1dcfb01aa4c33..ca2e244fb9bfa 100644 --- a/.github/scripts/check_html.py +++ b/.github/scripts/check_html.py @@ -15,7 +15,7 @@ from bs4 import BeautifulSoup -import glob +from glob import glob def is_valid_HTML_tag(html_string_to_check: str) -> bool: soup = BeautifulSoup(html_string_to_check, 'html.parser') From 2c8067fa0f39a1b6ea771103d7121138750bba2d Mon Sep 17 00:00:00 2001 From: TaiJu Wu Date: Mon, 10 Feb 2025 23:33:57 +0000 Subject: [PATCH 8/8] print html content --- .github/scripts/check_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py index ca2e244fb9bfa..6f8f62e62f6ae 100644 --- a/.github/scripts/check_html.py +++ b/.github/scripts/check_html.py @@ -27,6 +27,6 @@ def is_valid_HTML_tag(html_string_to_check: str) -> bool: for html in htmls: with open(html, "r") as fp: content = fp.read() - print("kkk check content") + print("kkk check content", str(content)) if not is_valid_HTML_tag(content): exit(1)