TaiJuWu · TaiJuWu · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/.github/scripts/check_html.py b/.github/scripts/check_html.py
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from bs4 import BeautifulSoup
+from glob import glob
+
+def is_valid_HTML_tag(html_string_to_check: str) -> bool:
+    soup = BeautifulSoup(html_string_to_check, 'html.parser')
+    return html_string_to_check == str(soup)
+
+if __name__ == "__main__":
+    htmls = glob(pathname="**/*.html", recursive=True)
+    print("kkkk html", htmls)
+    for html in htmls:
+        with open(html, "r") as fp:
+            content = fp.read()
+            print("kkk check content", str(content))
+            if not is_valid_HTML_tag(content):
+                exit(1)
diff --git a/.github/scripts/requirements.txt b/.github/scripts/requirements.txt
@@ -15,3 +15,4 @@
 PyYAML~=6.0
 pytz==2024.2
 requests==2.32.3
+beautifulsoup4==4.10.0
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -143,6 +143,13 @@ jobs:
             find ./site-docs/generated -type f -exec grep -L "." {} \; >&2
             exit 1
           fi
+      - name: Check html content
+        if: always()
+        run: |
+          echo "execute Check html content"
+          if [ "$(python .github/scripts/check_html.py)" -ne 0 ]; then
+            exit 1
+          fi
 
   test:
     needs: [validate, load-catalog]

diff --git a/docs/upgrade.html b/docs/upgrade.html
@@ -339,7 +339,6 @@ <h5><a id="upgrade_400_notable" href="#upgrade_400_notable">Notable changes in 4
 		    This allows to not only collect the metric of the internally used clients of a Kafka Streams appliction via a broker-side plugin,
 		    but also to collect the <a href="/{{version}}/documentation/#kafka_streams_monitoring">metrics</a> of the Kafka Streams runtime itself.
             </li>
-                <li>
                     The default value of 'num.recovery.threads.per.data.dir' has been changed from 1 to 2. The impact of this is faster
                     recovery post unclean shutdown at the expense of extra IO cycles.
                     See <a href="https://cwiki.apache.org/confluence/display/KAFKA/KIP-1030%3A+Change+constraints+and+default+values+for+various+configurations">KIP-1030</a>