From dd14c84477791fb03583a6d412cdedaff88c4e1e Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 21:49:47 +0000
Subject: [PATCH 1/8] add check_html

---
 .github/scripts/check_html.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 .github/scripts/check_html.py
diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py
new file mode 100644
index 0000000000000..97bf82d677fc2
--- /dev/null
+++ b/.github/scripts/check_html.py
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from bs4 import BeautifulSoup
+import glob
+
+def is_valid_HTML_tag(html_string_to_check: str) -> bool:
+    soup = BeautifulSoup(html_string_to_check, 'html.parser')
+    return html_string_to_check == str(soup)
+
+if __name__ == "__main__":
+    htmls = glob.glob(r'*.html')
+    for html in htmls:
+        with open(html, "r") as fp:
+            content = fp.read()
+            if not is_valid_HTML_tag(content):
+                exit(1)

From 8de5596dee33b74ecf9489c39f2d77e2387ef015 Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 22:14:18 +0000
Subject: [PATCH 2/8] add to flow

---
 .github/scripts/check_html.py | 2 +-
 .github/workflows/build.yml   | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py
index 97bf82d677fc2..1ea33012bc96b 100644
--- a/.github/scripts/check_html.py
+++ b/.github/scripts/check_html.py
@@ -22,7 +22,7 @@ def is_valid_HTML_tag(html_string_to_check: str) -> bool:
     return html_string_to_check == str(soup)
 
 if __name__ == "__main__":
-    htmls = glob.glob(r'*.html')
+    htmls = glob(pathname="**/*.html", recursive=True)
     for html in htmls:
         with open(html, "r") as fp:
             content = fp.read()
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e1992a4059281..d521bcc7fa647 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -143,6 +143,11 @@ jobs:
             find ./site-docs/generated -type f -exec grep -L "." {} \; >&2
             exit 1
           fi
+      - name: Check html content
+        run: |
+          if [ python .github/scripts/check_html.py -ne 0]; then
+            exit 1
+          fi
 
   test:
     needs: [validate, load-catalog]

From d978421f08dd189ad0917bbd56c54f48a7c302c3 Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 22:21:31 +0000
Subject: [PATCH 3/8] Revert "MINOR: add missing <li> to upgrade.html (#18817)"

This reverts commit c1a813b7400ff38bef887f1e1a9d2ef801afb5cb.
---
 docs/upgrade.html | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/upgrade.html b/docs/upgrade.html
index 3c42da282145e..1ff685c95cd80 100644
--- a/docs/upgrade.html
+++ b/docs/upgrade.html
@@ -339,7 +339,6 @@ <h5><a id="upgrade_400_notable" href="#upgrade_400_notable">Notable changes in 4
 		    This allows to not only collect the metric of the internally used clients of a Kafka Streams appliction via a broker-side plugin,
 		    but also to collect the <a href="/{{version}}/documentation/#kafka_streams_monitoring">metrics</a> of the Kafka Streams runtime itself.
             </li>
-                <li>
                     The default value of 'num.recovery.threads.per.data.dir' has been changed from 1 to 2. The impact of this is faster
                     recovery post unclean shutdown at the expense of extra IO cycles.
                     See <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-1030%3A+Change+constraints+and+default+values+for+various+configurations">KIP-1030</a> 

From 10739f836020e1d4b1d50b5a7cd85ef6cbaa45cf Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 22:40:23 +0000
Subject: [PATCH 4/8] test

---
 .github/scripts/check_html.py | 2 ++
 .github/workflows/build.yml   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py
index 1ea33012bc96b..1dcfb01aa4c33 100644
--- a/.github/scripts/check_html.py
+++ b/.github/scripts/check_html.py
@@ -23,8 +23,10 @@ def is_valid_HTML_tag(html_string_to_check: str) -> bool:
 
 if __name__ == "__main__":
     htmls = glob(pathname="**/*.html", recursive=True)
+    print("kkkk html", htmls)
     for html in htmls:
         with open(html, "r") as fp:
             content = fp.read()
+            print("kkk check content")
             if not is_valid_HTML_tag(content):
                 exit(1)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d521bcc7fa647..0de7c2b4291da 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -144,7 +144,9 @@ jobs:
             exit 1
           fi
       - name: Check html content
+        if: always()
         run: |
+          echo "execute Check html content"
           if [ python .github/scripts/check_html.py -ne 0]; then
             exit 1
           fi

From ebfc724c19fc3931af0110a89b14f51535ff2eca Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 23:08:26 +0000
Subject: [PATCH 5/8] fix shell

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0de7c2b4291da..ef69994f7caad 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -147,7 +147,7 @@ jobs:
         if: always()
         run: |
           echo "execute Check html content"
-          if [ python .github/scripts/check_html.py -ne 0]; then
+          if [ "$(python .github/scripts/check_html.py)" -ne 0 ]; then
             exit 1
           fi
 

From e9766cc9061075d208f821fd604584e3297e85aa Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 23:19:13 +0000
Subject: [PATCH 6/8] add dependency

---
 .github/scripts/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt
index d59455f79dac6..91f84eecbc4d8 100644
--- a/.github/scripts/requirements.txt
+++ b/.github/scripts/requirements.txt
@@ -15,3 +15,4 @@
 PyYAML~=6.0
 pytz==2024.2
 requests==2.32.3
+beautifulsoup4==4.10.0

From e38945a6c4da28f3038de904ae0074d029aaec0c Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 23:27:23 +0000
Subject: [PATCH 7/8] fix glob

---
 .github/scripts/check_html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py
index 1dcfb01aa4c33..ca2e244fb9bfa 100644
--- a/.github/scripts/check_html.py
+++ b/.github/scripts/check_html.py
@@ -15,7 +15,7 @@
 
 
 from bs4 import BeautifulSoup
-import glob
+from glob import glob
 
 def is_valid_HTML_tag(html_string_to_check: str) -> bool:
     soup = BeautifulSoup(html_string_to_check, 'html.parser')

From 2c8067fa0f39a1b6ea771103d7121138750bba2d Mon Sep 17 00:00:00 2001
From: TaiJu Wu <tjwu1217@gmail.com>
Date: Mon, 10 Feb 2025 23:33:57 +0000
Subject: [PATCH 8/8] print html content

---
 .github/scripts/check_html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py
index ca2e244fb9bfa..6f8f62e62f6ae 100644
--- a/.github/scripts/check_html.py
+++ b/.github/scripts/check_html.py
@@ -27,6 +27,6 @@ def is_valid_HTML_tag(html_string_to_check: str) -> bool:
     for html in htmls:
         with open(html, "r") as fp:
             content = fp.read()
-            print("kkk check content")
+            print("kkk check content", str(content))
             if not is_valid_HTML_tag(content):
                 exit(1)