From f528d0da75dc0aa2bc0815155c447a1310f5783c Mon Sep 17 00:00:00 2001 From: Sean Yang Date: Thu, 20 Feb 2025 17:48:59 -0800 Subject: [PATCH] Add predefined log modes (#3221) - add predefined log modes ("default", "concise", and "verbose") to simulator and dynamic log config admin commands config argument - move simulator log_config.json to fuel/utils - update documentation - also applied to 2.5 branch (https://github.com/NVIDIA/NVFlare/pull/3237) ### Types of changes - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Quick tests passed locally by running `./runtest.sh`. - [ ] In-line docstrings updated. - [ ] Documentation updated. --- .../configurations/logging_configuration.rst | 27 ++++-- examples/tutorials/logging.ipynb | 54 ++++++++++- .../simulator => fuel/utils}/log_config.json | 0 nvflare/fuel/utils/log_utils.py | 92 +++++++++++++------ nvflare/job_config/api.py | 2 +- .../private/fed/app/simulator/simulator.py | 9 +- .../fed/app/simulator/simulator_runner.py | 41 +++++---- .../fed/app/simulator/simulator_worker.py | 12 +-- nvflare/private/fed/client/admin_commands.py | 7 +- nvflare/private/fed/client/sys_cmd.py | 4 +- nvflare/private/fed/server/server_commands.py | 7 +- nvflare/private/fed/server/sys_cmd.py | 4 +- 12 files changed, 188 insertions(+), 71 deletions(-) rename nvflare/{private/fed/app/simulator => fuel/utils}/log_config.json (100%) diff --git a/docs/user_guide/configurations/logging_configuration.rst b/docs/user_guide/configurations/logging_configuration.rst index 8c86ba4703..cc1e9818a9 100644 --- a/docs/user_guide/configurations/logging_configuration.rst +++ b/docs/user_guide/configurations/logging_configuration.rst @@ -20,7 +20,8 @@ Logging Configuration and Features Default Logging Configuration ============================= -The default logging configuration json file **log_config.json.default** is divided into 3 main sections: formatters, handlers, and loggers. +The default logging configuration json file (**log_config.json.default**, ``default``) is divided into 3 main sections: formatters, handlers, and loggers. +This file can be found at :github_nvflare_link:`log_config.json `. See the `configuration dictionary schema <(https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema)>`_ for more details. .. code-block:: json @@ -255,7 +256,7 @@ The following log file handlers are pre-configured: - jsonFileHandler with jsonFormatter to write json formatted logs to ``log.json`` - FLFileHandler with baseFormatter and FLFilter to write fl training and custom logs to ``log_fl.txt`` - +.. _loggers: Loggers ======= @@ -288,7 +289,7 @@ Modifying Logging Configurations Simulator log configuration =========================== -Users can specify a log configuration file in the simulator command with the ``-l`` simulator argument: +Users can specify a log configuration in the simulator command with the ``-l`` simulator argument: .. code-block:: shell @@ -301,6 +302,13 @@ Or using the ``log_config`` argument of the Job API simulator run: job.simulator_run("/tmp/nvflare/hello-numpy-sag", log_config="log_config.json") +The log config argument be one of the following: + +- path to a log configuration json file (``/path/to/my_log_config.json``) +- preconfigured log mode (``default``, ``concise``, ``verbose``) +- log level name or number (``debug``, ``info``, ``warning``, ``error``, ``critical``, ``30``) + + POC log configurations ====================== If you search the POC workspace, you will find the following: @@ -342,16 +350,23 @@ We also recommend using the :ref:`Dynamic Logging Configuration Commands ...``, or ``all`` - **config**: log configuration - path to a json log configuration file (``/path/to/my_log_config.json``) + - predefined log mode (``default``, ``concise``, ``verbose``) - log level name/number (``debug``, ``INFO``, ``30``) - - read the current log configuration file (``reload``) + - read the current log configuration file from the workspace (``reload``) -To configure the target site logging (does not affect jobs): +To configure the target site logging (does not affect currently running jobs): .. code-block:: shell diff --git a/examples/tutorials/logging.ipynb b/examples/tutorials/logging.ipynb index 35d07f86e9..3a4d14a45b 100644 --- a/examples/tutorials/logging.ipynb +++ b/examples/tutorials/logging.ipynb @@ -23,7 +23,12 @@ "source": [ "## Simulator Logging\n", "\n", - "To get started, let's run the **hello-numpy-sag** job in the simulator and take a look at the default logging output:" + "To get started, let's run the **hello-numpy-sag** job in the simulator and take a look at the default logging output.\n", + "\n", + "The simulator `--log_config` (`-l`) argument can be used to set the log config mode ('concise', 'full', 'verbose'), filepath to a log config json file, or level (info, debug, error, etc.).\n", + "For this example we first show the `\"full\"` mode to compare it with our custom configuration.\n", + "\n", + "Note: this argument is defaulted to `\"concise\"` in the simulator mode, however feel free to experiment with the other modes or levels." ] }, { @@ -34,18 +39,18 @@ "outputs": [], "source": [ "!mkdir -p hello-numpy-sag-workspace\n", - "!nvflare simulator -w hello-numpy-sag-workspace -n 2 -t 2 ../hello-world/hello-numpy-sag/jobs/hello-numpy-sag" + "!nvflare simulator -w hello-numpy-sag-workspace -n 2 -t 2 -l full ../hello-world/hello-numpy-sag/jobs/hello-numpy-sag " ] }, { "cell_type": "markdown", - "id": "80ee3335", + "id": "3bb2c22c", "metadata": {}, "source": [ "Notice how the output contains lots of logs from both the FLARE system, as well as the training workflow.\n", "Additionally, the different level of logs (eg. INFO, WARNING, ERROR) have different console colors.\n", "\n", - "We can view the default configuration used in this run and the generated log files in the workspace:" + "We now will cover custom log configurations using the log config json file option. First lets look at the default configuration used in this run and the generated log files in the workspace:" ] }, { @@ -269,7 +274,7 @@ "id": "d3b43521", "metadata": {}, "source": [ - "Compare this to the original output from the first command, and note the differences in the log output.\n", + "Compare this to the original output from the first command, and note the differences in the log output. Note that this concise format can also be achieved using the simulator \"concise\" mode, however this helps cover how to customize the logs using the file.\n", "\n", "In addition to the consoleHandler, all the other formatters, filters, handlers, and loggers can all also be customized just as easily.\n", "\n", @@ -303,6 +308,7 @@ "\n", "- **target**: server, client ..., or all\n", "- **config**: log configuration\n", + " - log mode (concise, full, verbose)\n", " - path to a json log configuration file (/path/to/my_log_config.json)\n", " - log level name/number (debug, INFO, 30)\n", " - read the current log configuration file (reload)\n", @@ -368,6 +374,44 @@ "source": [ "!tree /tmp/nvflare/poc" ] + }, + { + "cell_type": "markdown", + "id": "b50c4865", + "metadata": {}, + "source": [ + "## Defining Loggers in the Hierarchy\n", + "\n", + "When defining new loggers, we provide several functions to help adhere to the FLARE package logger hierarchy. For example say we have the following module at `my_package.my_module`:\n", + "\n", + "- [get_obj_logger](https://nvflare.readthedocs.io/en/main/apidocs/nvflare.fuel.utils.log_utils.html#nvflare.fuel.utils.log_utils.get_obj_logger) for classes. Ex: \n", + "```\n", + " class MyClass:\n", + " def __init__(self):\n", + " self.logger = get_obj_logger(self) # my_package.my_module.MyClass\n", + "```\n", + "\n", + "- [get_script_logger](https://nvflare.readthedocs.io/en/main/apidocs/nvflare.fuel.utils.log_utils.html#nvflare.fuel.utils.log_utils.get_script_logger) for scripts (if not in a package, default to custom.). Ex:\n", + "```\n", + " if __name__ == \"__main__\":\n", + " logger = get_script_logger() # my_package.my_module\n", + "```\n", + "\n", + "- [get_module_logger](https://nvflare.readthedocs.io/en/main/apidocs/nvflare.fuel.utils.log_utils.html#nvflare.fuel.utils.log_utils.get_module_logger) for modules. Ex:\n", + "```\n", + " def my_function():\n", + " logger = get_module_logger(name=\"my_function\") # my_package.my_module.my_function\n", + "```\n", + "\n", + "\n", + "For more information, refer to the [Logging Configuration Documentation](https://nvflare.readthedocs.io/en/main/user_guide/configurations/logging_configuration.html#loggers) for definining loggers. " + ] + }, + { + "cell_type": "markdown", + "id": "ac669133", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/nvflare/private/fed/app/simulator/log_config.json b/nvflare/fuel/utils/log_config.json similarity index 100% rename from nvflare/private/fed/app/simulator/log_config.json rename to nvflare/fuel/utils/log_config.json diff --git a/nvflare/fuel/utils/log_utils.py b/nvflare/fuel/utils/log_utils.py index 2e15b5ee98..8c3e6d30c7 100644 --- a/nvflare/fuel/utils/log_utils.py +++ b/nvflare/fuel/utils/log_utils.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import copy import inspect import json import logging @@ -19,9 +20,40 @@ import re from logging import Logger from logging.handlers import RotatingFileHandler +from typing import Union from nvflare.apis.workspace import Workspace +DEFAULT_LOG_JSON = "log_config.json" + + +class LogMode: + RELOAD = "reload" + FULL = "full" + CONCISE = "concise" + VERBOSE = "verbose" + + +# Predefined log dicts based from DEFAULT_LOG_JSON +with open(os.path.join(os.path.dirname(__file__), DEFAULT_LOG_JSON), "r") as f: + default_log_dict = json.load(f) + +concise_log_dict = copy.deepcopy(default_log_dict) +concise_log_dict["formatters"]["consoleFormatter"]["fmt"] = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +concise_log_dict["handlers"]["consoleHandler"]["filters"] = ["FLFilter"] + +verbose_log_dict = copy.deepcopy(default_log_dict) +verbose_log_dict["formatters"]["consoleFormatter"][ + "fmt" +] = "%(asctime)s - %(identity)s - %(name)s - %(levelname)s - %(message)s" +verbose_log_dict["loggers"]["root"]["level"] = "DEBUG" + +logmode_config_dict = { + LogMode.FULL: default_log_dict, + LogMode.CONCISE: concise_log_dict, + LogMode.VERBOSE: verbose_log_dict, +} + class ANSIColor: # Basic ANSI color codes @@ -229,6 +261,7 @@ def matches_name(self, name, logger_names) -> bool: def get_module_logger(module=None, name=None): + # Get module logger name adhering to logger hierarchy. Optionally add name as a suffix. if module is None: caller_globals = inspect.stack()[1].frame.f_globals module = caller_globals.get("__name__", "") @@ -237,11 +270,12 @@ def get_module_logger(module=None, name=None): def get_obj_logger(obj): - return logging.getLogger(f"{obj.__module__}.{obj.__class__.__qualname__}") + # Get object logger name adhering to logger hierarchy. + return logging.getLogger(f"{obj.__module__}.{obj.__class__.__qualname__}") if obj else None def get_script_logger(): - # Get script logger name based on filename and package. If not in a package, default to custom. + # Get script logger name adhering to logger hierarchy. Based on package and filename. If not in a package, default to custom. caller_frame = inspect.stack()[1] package = caller_frame.frame.f_globals.get("__package__", "") file = caller_frame.frame.f_globals.get("__file__", "") @@ -278,40 +312,40 @@ def apply_log_config(dict_config, dir_path: str = "", file_prefix: str = ""): logging.config.dictConfig(dict_config) -def dynamic_log_config(config: str, workspace: Workspace, job_id: str = None): - # Dynamically configure log given a config (filepath, levelname, levelnumber, 'reload'), apply the config to the proper locations. - if not isinstance(config, str): - raise ValueError( - f"Unsupported config type. Expect config to be string filepath, levelname, levelnumber, or 'reload' but got {type(config)}" - ) +def dynamic_log_config(config: Union[dict, str], dir_path: str, reload_path: str): + # Dynamically configure log given a config (dict, filepath, LogMode, or level), apply the config to the proper locations. - if config == "reload": - config = workspace.get_log_config_file_path() + if isinstance(config, dict): + apply_log_config(config, dir_path) + elif isinstance(config, str): + # Handle pre-defined LogModes + if config == LogMode.RELOAD: + config = reload_path + elif log_config := logmode_config_dict.get(config): + apply_log_config(copy.deepcopy(log_config), dir_path) + return - if os.path.isfile(config): - # Read confg file - with open(config, "r") as f: - dict_config = json.load(f) + # Read config file + if os.path.isfile(config): + with open(config, "r") as f: + dict_config = json.load(f) - if job_id: - dir_path = workspace.get_run_dir(job_id) + apply_log_config(dict_config, dir_path) else: - dir_path = workspace.get_root_dir() + # If logging is not yet configured, use default config + if not logging.getLogger().hasHandlers(): + apply_log_config(default_log_dict, dir_path) - apply_log_config(dict_config, dir_path) - - else: - # Set level of root logger based on levelname or levelnumber - if config.isdigit(): - level = int(config) - if not (0 <= level <= 50): - raise ValueError(f"Invalid logging level: {level}") - else: - level = getattr(logging, config.upper(), None) - if level is None: + # Set level of root logger based on levelname or levelnumber + level = int(config) if config.isdigit() else getattr(logging, config.upper(), None) + if level is None or not (0 <= level <= 50): raise ValueError(f"Invalid logging level: {config}") - logging.getLogger().setLevel(level) + logging.getLogger().setLevel(level) + else: + raise ValueError( + f"Unsupported config type. Expect config to be a dict, filepath, level, or LogMode but got {type(config)}" + ) def add_log_file_handler(log_file_name): diff --git a/nvflare/job_config/api.py b/nvflare/job_config/api.py index ae85d32b45..a09bb61954 100644 --- a/nvflare/job_config/api.py +++ b/nvflare/job_config/api.py @@ -533,7 +533,7 @@ def simulator_run( n_clients: number of clients. threads: number of threads. gpu: gpu assignments for simulating clients, comma separated - log_config: log config json file path + log_config: log config mode ('concise', 'default', 'verbose'), filepath, or level Returns: diff --git a/nvflare/private/fed/app/simulator/simulator.py b/nvflare/private/fed/app/simulator/simulator.py index 6f914be1d9..a3566e9158 100644 --- a/nvflare/private/fed/app/simulator/simulator.py +++ b/nvflare/private/fed/app/simulator/simulator.py @@ -18,6 +18,7 @@ import sys from sys import platform +from nvflare.fuel.utils.log_utils import LogMode from nvflare.private.fed.app.simulator.simulator_runner import SimulatorRunner from nvflare.private.fed.app.utils import version_check @@ -29,7 +30,13 @@ def define_simulator_parser(simulator_parser): simulator_parser.add_argument("-c", "--clients", type=str, help="client names list") simulator_parser.add_argument("-t", "--threads", type=int, help="number of parallel running clients") simulator_parser.add_argument("-gpu", "--gpu", type=str, help="list of GPU Device Ids, comma separated") - simulator_parser.add_argument("-l", "--log_config", type=str, help="log config file path") + simulator_parser.add_argument( + "-l", + "--log_config", + type=str, + default=LogMode.CONCISE, + help="log config mode ('concise', 'full', 'verbose'), filepath, or level", + ) simulator_parser.add_argument("-m", "--max_clients", type=int, default=100, help="max number of clients") simulator_parser.add_argument( "--end_run_for_all", diff --git a/nvflare/private/fed/app/simulator/simulator_runner.py b/nvflare/private/fed/app/simulator/simulator_runner.py index ad80f32141..0ac2ec7cdb 100644 --- a/nvflare/private/fed/app/simulator/simulator_runner.py +++ b/nvflare/private/fed/app/simulator/simulator_runner.py @@ -49,10 +49,11 @@ from nvflare.fuel.f3.stats_pool import StatsPoolManager from nvflare.fuel.hci.server.authz import AuthorizationService from nvflare.fuel.sec.audit import AuditService +from nvflare.fuel.utils import log_utils from nvflare.fuel.utils.argument_utils import parse_vars from nvflare.fuel.utils.config_service import ConfigService from nvflare.fuel.utils.gpu_utils import get_host_gpu_ids -from nvflare.fuel.utils.log_utils import apply_log_config +from nvflare.fuel.utils.log_utils import dynamic_log_config from nvflare.fuel.utils.network_utils import get_open_ports from nvflare.fuel.utils.zip_utils import split_path, unzip_all_from_bytes, zip_directory_to_bytes from nvflare.private.defs import AppFolderConstants @@ -126,8 +127,10 @@ def __init__( f" {os.path.join(running_dir, self.workspace)}" ) self.workspace = os.path.join(running_dir, self.workspace) + if log_config: - self.log_config = os.path.join(running_dir, log_config) + log_config_path = os.path.join(running_dir, log_config) + self.log_config = log_config_path if os.path.isfile(log_config_path) else log_config def _generate_args( self, @@ -172,18 +175,14 @@ def setup(self): for i in range(self.args.n_clients): self.client_names.append("site-" + str(i + 1)) + log_config_file_path = os.path.join(self.args.workspace, "local", WorkspaceConstants.LOGGING_CONFIG) + if not os.path.isfile(log_config_file_path): + log_config_file_path = os.path.join(os.path.dirname(log_utils.__file__), WorkspaceConstants.LOGGING_CONFIG) + if self.args.log_config: - log_config_file_path = self.args.log_config - if not os.path.isfile(log_config_file_path): - self.logger.error(f"log_config: {log_config_file_path} is not a valid file path") - return False + log_config = self.args.log_config else: - log_config_file_path = os.path.join(self.args.workspace, "local", WorkspaceConstants.LOGGING_CONFIG) - if not os.path.isfile(log_config_file_path): - log_config_file_path = os.path.join(os.path.dirname(__file__), WorkspaceConstants.LOGGING_CONFIG) - - with open(log_config_file_path, "r") as f: - dict_config = json.load(f) + log_config = log_config_file_path self.args.config_folder = "config" self.args.job_id = SimulatorConstants.JOB_NAME @@ -206,7 +205,11 @@ def setup(self): os.makedirs(os.path.join(self.simulator_root, SiteType.SERVER)) - apply_log_config(dict_config, os.path.join(self.simulator_root, SiteType.SERVER)) + dynamic_log_config( + config=log_config, + dir_path=os.path.join(self.simulator_root, SiteType.SERVER), + reload_path=log_config_file_path, + ) try: data_bytes, job_name, meta = self.validate_job_data() @@ -694,14 +697,16 @@ def _pick_next_client(self): def do_one_task(self, client, num_of_threads, gpu, lock, timeout=60.0, task_name=RunnerTask.TASK_EXEC): open_port = get_open_ports(1)[0] client_workspace = os.path.join(self.args.workspace, client.client_name) + + log_config_file_path = os.path.join(self.args.workspace, "local", WorkspaceConstants.LOGGING_CONFIG) + if not os.path.isfile(log_config_file_path): + log_config_file_path = os.path.join(os.path.dirname(log_utils.__file__), WorkspaceConstants.LOGGING_CONFIG) + if self.args.log_config: logging_config = self.args.log_config - if not os.path.isfile(logging_config): - raise ValueError(f"log_config: {logging_config} is not a valid file path") else: - logging_config = os.path.join( - self.args.workspace, client.client_name, "local", WorkspaceConstants.LOGGING_CONFIG - ) + logging_config = log_config_file_path + decomposer_module = ConfigService.get_str_var( name=ConfigVarName.DECOMPOSER_MODULE, conf=SystemConfigs.RESOURCES_CONF ) diff --git a/nvflare/private/fed/app/simulator/simulator_worker.py b/nvflare/private/fed/app/simulator/simulator_worker.py index 33a663d51e..3099eed416 100644 --- a/nvflare/private/fed/app/simulator/simulator_worker.py +++ b/nvflare/private/fed/app/simulator/simulator_worker.py @@ -13,7 +13,6 @@ # limitations under the License. import argparse -import json import os import sys import threading @@ -30,7 +29,7 @@ from nvflare.fuel.f3.mpm import MainProcessMonitor as mpm from nvflare.fuel.hci.server.authz import AuthorizationService from nvflare.fuel.sec.audit import AuditService -from nvflare.fuel.utils.log_utils import apply_log_config +from nvflare.fuel.utils.log_utils import dynamic_log_config from nvflare.private.fed.app.deployer.base_client_deployer import BaseClientDeployer from nvflare.private.fed.app.utils import check_parent_alive, init_security_content_service from nvflare.private.fed.client.client_engine import ClientEngine @@ -240,11 +239,6 @@ def main(args): thread = threading.Thread(target=check_parent_alive, args=(parent_pid, stop_event)) thread.start() - with open(args.logging_config, "r") as f: - dict_config = json.load(f) - - apply_log_config(dict_config, args.workspace) - os.chdir(args.workspace) startup = os.path.join(args.workspace, WorkspaceConstants.STARTUP_FOLDER_NAME) os.makedirs(startup, exist_ok=True) @@ -252,6 +246,10 @@ def main(args): os.makedirs(local, exist_ok=True) workspace = Workspace(root_dir=args.workspace, site_name=args.client) + dynamic_log_config( + config=args.logging_config, dir_path=args.workspace, reload_path=workspace.get_log_config_file_path() + ) + fobs_initialize(workspace, job_id=SimulatorConstants.JOB_NAME) register_ext_decomposers(args.decomposer_module) AuthorizationService.initialize(EmptyAuthorizer()) diff --git a/nvflare/private/fed/client/admin_commands.py b/nvflare/private/fed/client/admin_commands.py index 5b164d70d5..88479461cb 100644 --- a/nvflare/private/fed/client/admin_commands.py +++ b/nvflare/private/fed/client/admin_commands.py @@ -270,8 +270,13 @@ def process(self, data: Shareable, fl_ctx: FLContext): """ engine = fl_ctx.get_engine() + workspace = engine.get_workspace() try: - dynamic_log_config(data, engine.get_workspace(), fl_ctx.get_job_id()) + dynamic_log_config( + config=data, + dir_path=workspace.get_run_dir(fl_ctx.get_job_id()), + reload_path=workspace.get_log_config_file_path(), + ) except Exception as e: return secure_format_exception(e) diff --git a/nvflare/private/fed/client/sys_cmd.py b/nvflare/private/fed/client/sys_cmd.py index ed79573487..3183dc7979 100644 --- a/nvflare/private/fed/client/sys_cmd.py +++ b/nvflare/private/fed/client/sys_cmd.py @@ -95,7 +95,9 @@ def process(self, req: Message, app_ctx) -> Message: workspace = fl_ctx.get_prop(FLContextKey.WORKSPACE_OBJECT) try: - dynamic_log_config(req.body, workspace) + dynamic_log_config( + config=req.body, dir_path=workspace.get_root_dir(), reload_path=workspace.get_log_config_file_path() + ) except Exception as e: return error_reply(secure_format_exception(e)) diff --git a/nvflare/private/fed/server/server_commands.py b/nvflare/private/fed/server/server_commands.py index e5215cf85c..89f5b601de 100644 --- a/nvflare/private/fed/server/server_commands.py +++ b/nvflare/private/fed/server/server_commands.py @@ -443,8 +443,13 @@ def process(self, data: Shareable, fl_ctx: FLContext): """ engine = fl_ctx.get_engine() + workspace = engine.get_workspace() try: - dynamic_log_config(data, engine.get_workspace(), fl_ctx.get_job_id()) + dynamic_log_config( + config=data, + dir_path=workspace.get_run_dir(fl_ctx.get_job_id()), + reload_path=workspace.get_log_config_file_path(), + ) except Exception as e: return secure_format_exception(e) diff --git a/nvflare/private/fed/server/sys_cmd.py b/nvflare/private/fed/server/sys_cmd.py index a908cc27b6..4d2e3030c1 100644 --- a/nvflare/private/fed/server/sys_cmd.py +++ b/nvflare/private/fed/server/sys_cmd.py @@ -148,7 +148,9 @@ def configure_site_log(self, conn: Connection, args: [str]): workspace = engine.get_workspace() try: - dynamic_log_config(config, workspace) + dynamic_log_config( + config=config, dir_path=workspace.get_root_dir(), reload_path=workspace.get_log_config_file_path() + ) except Exception as e: conn.append_error( secure_format_exception(e),