diff --git a/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_configuration.py b/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_configuration.py index 0e8e358484..669fd9275c 100644 --- a/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_configuration.py +++ b/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_configuration.py @@ -13,7 +13,7 @@ def add_definitions(config_builder: ConfigurationBuilder): """ config_builder.add( key=TROUBLESHOOT_UTILITIES_TO_USE, - default_value=None, + default_value="", description=""" An unordered comma-separated list of strings, indicating what troubleshooting utilities are to be used. E.g., "utility1,utility2". diff --git a/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_utilities/healthcheck_server.py b/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_utilities/healthcheck_server.py index b793273ea3..4d0b1bec0b 100644 --- a/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_utilities/healthcheck_server.py +++ b/projects/vdk-plugins/vdk-jobs-troubleshooting/src/vdk/plugin/jobs_troubleshoot/troubleshoot_utilities/healthcheck_server.py @@ -1,6 +1,7 @@ # Copyright 2021 VMware, Inc. # SPDX-License-Identifier: Apache-2.0 import logging +import socket from http.server import HTTPServer from threading import Thread from typing import Any @@ -21,25 +22,50 @@ def __init__(self, port: int, handler: Any = None): port (int): The port number on which the server will listen for requests. handler (Any, optional): The request handler class. Defaults to SimpleHTTPRequestHandler. """ - if handler: - self._server = HTTPServer(("", port), handler) - self._thread = Thread(target=self._server.serve_forever) - log.error(f"Troubleshooting utility server started on port {port}.") - else: - log.error( - "Troubleshooting utility handler not specified. Will not start the server." + try: + if handler: + port = self.find_open_port(port) + self._server = HTTPServer(("", port), handler) + self._thread = Thread(target=self._server.serve_forever) + log.info(f"Troubleshooting utility server started on port {port}.") + else: + log.error( + "Troubleshooting utility handler not specified. Will not start the server." + ) + except Exception as e: + logging.error( + "Error during troubleshooting server initialization", exc_info=e ) def start(self): """ Starts the server. """ - self._thread.start() + try: + self._thread.start() + except Exception as e: + logging.error("Unable to start troubleshooting server", exc_info=e) def stop(self): """ Stops the server. """ - self._server.shutdown() - self._server.server_close() - self._thread.join() + try: + self._server.shutdown() + self._server.server_close() + self._thread.join() + except Exception as e: + logging.error("Unable to stop troubleshooting server", exc_info=e) + + @staticmethod + def find_open_port(start_port: int): + port = start_port + while True: + try: + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind(("", port)) + s.close() + return port + except OSError: + # Port is already in use. Try the next one + port += 1 diff --git a/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/jobs/request-thread-dump/request-thread-dump.py b/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/jobs/request-thread-dump/request-thread-dump.py index 013aecee36..dcba8139ed 100644 --- a/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/jobs/request-thread-dump/request-thread-dump.py +++ b/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/jobs/request-thread-dump/request-thread-dump.py @@ -7,12 +7,20 @@ def run(job_input: IJobInput): - response = requests.get("http://localhost:8783/threads") - print(response.status_code) - print(response.text) + target_port = 8783 + while target_port < 65535: + try: + response = requests.get(f"http://localhost:{target_port}/threads") + if response.status_code != 200: + target_port = target_port + 1 + continue + except Exception: + target_port = target_port + 1 + continue + print(response.status_code) + print(response.text) - if response.status_code != 200: - raise Exception("unexpected response code from server") - - if "Thread:MainThread" not in response.text: - raise Exception("unexpected output from server") + if "Thread:MainThread" not in response.text: + raise Exception("unexpected output from server") + return + raise Exception("unable to connect to server") diff --git a/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/test_thread_dump.py b/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/test_thread_dump.py index 669750bb38..0c4b2ce266 100644 --- a/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/test_thread_dump.py +++ b/projects/vdk-plugins/vdk-jobs-troubleshooting/tests/functional/test_thread_dump.py @@ -4,6 +4,9 @@ # SPDX-License-Identifier: Apache-2.0 import os import pathlib +from http.server import HTTPServer +from http.server import SimpleHTTPRequestHandler +from threading import Thread from unittest import mock from click.testing import Result @@ -19,7 +22,7 @@ def job_path(job_name: str): ) -def test_http_ingestion(): +def test_thread_dump(): with mock.patch.dict( os.environ, { @@ -33,3 +36,29 @@ def test_http_ingestion(): result: Result = runner.invoke(["run", job_path("request-thread-dump")]) cli_assert_equal(0, result) assert "Dumping threads stacks" in result.stdout + + +def test_thread_dump_used_port(): + port = 8783 + server = HTTPServer(("", port), SimpleHTTPRequestHandler) + thread = Thread(target=server.serve_forever) + + try: + thread.start() + with mock.patch.dict( + os.environ, + { + "VDK_TROUBLESHOOT_UTILITIES_TO_USE": "thread-dump", + "VDK_PORT_TO_USE": f"{port}", + }, + ): + # create table first, as the ingestion fails otherwise + runner = CliEntryBasedTestRunner(jobs_troubleshoot_plugin) + + result: Result = runner.invoke(["run", job_path("request-thread-dump")]) + cli_assert_equal(0, result) + assert "Dumping threads stacks" in result.stdout + finally: + server.shutdown() + server.server_close() + thread.join()