yapf after ruff :)

neuralmagic · Mar 15, 2024 · c5633f2 · c5633f2
1 parent e283528
commit c5633f2
Show file tree

Hide file tree

Showing 12 changed files with 50 additions and 74 deletions.
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -1,6 +1,5 @@
 # flake8: noqa
 # UPSTREAM SYNC: noqa is required for passing ruff run on nm-automation
-
 """Benchmark online serving throughput.
 
 On the server side, run one of the following commands:

diff --git a/csrc/punica/bgmv/generator.py b/csrc/punica/bgmv/generator.py
@@ -10,7 +10,7 @@
 #include "bgmv_impl.cuh"
 
 FOR_BGMV_WIDE_NARROW(INST_BGMV_TWOSIDE, {input_dtype}, {output_dtype}, {weight_dtype})
-""".lstrip()    # noqa: E501 (UPSTREAM SYNC nm-automation)
+""".lstrip()  # noqa: E501 (UPSTREAM SYNC nm-automation)
 
 for input_dtype in DTYPES:
     for output_dtype in DTYPES:

diff --git a/neuralmagic/benchmarks/common.py b/neuralmagic/benchmarks/common.py
@@ -27,7 +27,7 @@ def max_model_length_from_model_id(model: str,
 
 
 def script_args_to_cla(config: NamedTuple) -> Iterable[dict]:
-    # config is a NamedTuple constructed from some JSON 
+    # config is a NamedTuple constructed from some JSON
     # in neuralmagic/benchmarks/configs
     kv = vars(config.script_args)
 

diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py
@@ -8,10 +8,8 @@
 from typing import NamedTuple, Optional
 from pathlib import Path
 
-from .common import (
-    download_model, max_model_length_from_model_id, 
-    script_args_to_cla, benchmark_configs
-)
+from .common import (download_model, max_model_length_from_model_id,
+                     script_args_to_cla, benchmark_configs)
 from .scripts.common import warmup_server, num_available_gpus
 from ..tools.call_cmd import call_cmd
 
@@ -59,26 +57,19 @@ def try_connection() -> bool:
     return False
 
 
-def run_benchmark_serving_script(
-    config: NamedTuple,
-    output_directory: Optional[Path] = None
-) -> None:
+def run_benchmark_serving_script(config: NamedTuple,
+                                 output_directory: Optional[Path] = None
+                                 ) -> None:
     assert config.script_name == 'benchmark_serving'
 
-    def run_bench(
-        server_cmd: str, 
-        bench_cmd: list[str], 
-        model: str
-    ) -> None:
+    def run_bench(server_cmd: str, bench_cmd: list[str], model: str) -> None:
         try:
             # start server
-            server_process = subprocess.Popen(
-                "exec " + server_cmd, shell=True)
+            server_process = subprocess.Popen("exec " + server_cmd, shell=True)
             if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT):
                 raise ValueError(
                     f"Aborting bench run with : server-cmd {server_cmd} , "
-                    f"bench-cmd {bench_cmd}. Reason: Cannot start Server"
-                )
+                    f"bench-cmd {bench_cmd}. Reason: Cannot start Server")
 
             # server warmup
             warmup_server(server_host=BENCH_SERVER_HOST,
@@ -106,16 +97,14 @@ def run_bench(
 
         supported_max_model_len = max_model_length_from_model_id(model)
 
-        # If the requested model-len is too big, try running with the 
+        # If the requested model-len is too big, try running with the
         # maximum supported for this model.
         max_model_lens = set(
             map(lambda v: min(v, supported_max_model_len),
                 config.max_model_lens))
         if (config.max_model_lens != list(max_model_lens)):
-            print(
-                f"WARNING: max_model_len modified to {max_model_lens} " 
-                f"from {config.max_model_lens} for model {model}"
-            )
+            print(f"WARNING: max_model_len modified to {max_model_lens} "
+                  f"from {config.max_model_lens} for model {model}")
 
         for max_model_len in max_model_lens:
 
@@ -132,7 +121,7 @@ def run_bench(
                 server_args["sparsity"] = sparsity
 
             server_cmd = "python3 -m vllm.entrypoints.api_server " + \
-                            " ".join([f"--{k} {v}" 
+                            " ".join([f"--{k} {v}"
                                       for k, v in server_args.items()])
 
             for script_args in script_args_to_cla(config):

diff --git a/neuralmagic/benchmarks/run_benchmark_throughput.py b/neuralmagic/benchmarks/run_benchmark_throughput.py
@@ -3,9 +3,8 @@
 from pathlib import Path
 from typing import NamedTuple, Optional
 
-from .common import (
-    script_args_to_cla, benchmark_configs, max_model_length_from_model_id
-)
+from .common import (script_args_to_cla, benchmark_configs,
+                     max_model_length_from_model_id)
 from ..tools.call_cmd import call_cmd
 
 
@@ -21,16 +20,14 @@ def run_benchmark_throughput_script(config: NamedTuple,
 
         supported_max_model_len = max_model_length_from_model_id(model)
 
-        # If the requested model-len is too big, try running with 
+        # If the requested model-len is too big, try running with
         # the maximum supported for this model.
         max_model_lens = set(
             map(lambda v: min(v, supported_max_model_len),
                 config.max_model_lens))
         if (config.max_model_lens != list(max_model_lens)):
-            print(
-                f"WARNING: max_model_len modified to {max_model_lens} "
-                f"from {config.max_model_lens} for model {model}"
-            )
+            print(f"WARNING: max_model_len modified to {max_model_lens} "
+                  f"from {config.max_model_lens} for model {model}")
 
         for max_model_len in max_model_lens:
             for script_args in script_args_to_cla(config):

diff --git a/neuralmagic/benchmarks/scripts/backend_request_func.py b/neuralmagic/benchmarks/scripts/backend_request_func.py
@@ -220,7 +220,7 @@ async def async_request_deepspeed_mii(
         output = RequestFuncOutput()
         output.prompt_len = request_func_input.prompt_len
 
-        # DeepSpeed-MII doesn't support streaming as of Jan 28 2024, 
+        # DeepSpeed-MII doesn't support streaming as of Jan 28 2024,
         # will use 0 as placeholder.
         # https://github.com/microsoft/DeepSpeed-MII/pull/311
         output.ttft = 0

diff --git a/neuralmagic/benchmarks/scripts/benchmark_serving.py b/neuralmagic/benchmarks/scripts/benchmark_serving.py
@@ -346,8 +346,7 @@ def script_args_as_json_dict(script_args: argparse.Namespace):
 
         # Add information about the derived variables as metadata
         metadata_key = BenchmarkResult.METADATA_KEY_
-        result[metadata_key][
-            ResultMetadataKeys.num_prompts] = num_prompts
+        result[metadata_key][ResultMetadataKeys.num_prompts] = num_prompts
         result[metadata_key][ResultMetadataKeys.request_rate] = \
             request_rate if request_rate < float("inf") else "inf"
 
@@ -384,10 +383,8 @@ def from_str(arg: str):
         "--description",
         type=str,
         default="benchmark-serving",
-        help=
-        "Benchmark description. This is primarily useful when "
-        "we log the benchmark results and process them for plotting charts"
-    )
+        help="Benchmark description. This is primarily useful when "
+        "we log the benchmark results and process them for plotting charts")
     parser.add_argument(
         "--backend",
         type=str,
@@ -436,7 +433,7 @@ def from_str(arg: str):
         "--tokenizer",
         type=str,
         help="Name or path of the tokenizer, "
-             "if not using the default model tokenizer.",
+        "if not using the default model tokenizer.",
     )
     parser.add_argument(
         "--best-of",
@@ -480,15 +477,14 @@ def from_str(arg: str):
         "Otherwise, we use Poisson process to synthesize "
         "the request arrival times.",
     )
-    parser.add_argument(
-        "--nr-qps-pair_",
-        type=NumPrompts_RequestRate_T.from_str,
-        help="""
+    parser.add_argument("--nr-qps-pair_",
+                        type=NumPrompts_RequestRate_T.from_str,
+                        help="""
         First argument in the pair is num_prompts to process.
         Second argument in the pair is request_rate per second.
             If this is inf, then all the requests are sent at time 0. 
             Otherwise, we use Poisson process to synthesize""",
-        default=None)
+                        default=None)
 
     # Server command args
     parser.add_argument(
@@ -503,27 +499,25 @@ def from_str(arg: str):
         "--server-args",
         type=str,
         default=None,
-        help=
-        "When we are logging the output, it is useful to log the "
-        "arguments passed to the server"
-    )
+        help="When we are logging the output, it is useful to log the "
+        "arguments passed to the server")
 
     def args_sanity_check(args):
         # Sanity check real-dataset vs synthetic-dataset usecase
         if args.dataset is None:
-            assert (args.num_input_tokens is not None and 
-                    args.num_output_tokens is not None)
+            assert (args.num_input_tokens is not None
+                    and args.num_output_tokens is not None)
         else:
-            assert (args.num_input_tokens is None and 
-                    args.num_output_tokens is None)
+            assert (args.num_input_tokens is None
+                    and args.num_output_tokens is None)
         # Sanity check num_prompts, request_rate as separate args vs joint args
         assert not all([
             args.num_prompts_ is None, args.request_rate_ is None,
             args.nr_qps_pair_ is None
         ])
         if args.nr_qps_pair_ is None:
-            assert (args.num_prompts_ is not None and 
-                    args.request_rate_ is not None)
+            assert (args.num_prompts_ is not None
+                    and args.request_rate_ is not None)
         else:
             assert args.num_prompts_ is None and args.request_rate_ is None
         # Sanity check required logging args

diff --git a/neuralmagic/benchmarks/scripts/benchmark_throughput.py b/neuralmagic/benchmarks/scripts/benchmark_throughput.py
@@ -12,7 +12,7 @@
 from pathlib import Path
 from typing import List, Optional, Tuple
 from transformers import AutoTokenizer
-from .common import (generate_synthetic_requests, warmup_vllm_engine, 
+from .common import (generate_synthetic_requests, warmup_vllm_engine,
                      num_available_gpus, print_request_outputs)
 from .datasets_registry import get_dataset, DatasetArgs
 from .logging.benchmark_result import (BenchmarkResult,
@@ -164,7 +164,7 @@ def main(args: argparse.Namespace):
         current_dt_str = current_dt.strftime("%Y%m%d-%H%M%S")
         file_name = Path(
             args.save_directory
-        ) / f"benchmark_throughput-{args.backend}-{model_id}-{current_dt_str}.json" # noqa: E501
+        ) / f"benchmark_throughput-{args.backend}-{model_id}-{current_dt_str}.json"  # noqa: E501
         result.store(file_name)
 
 
@@ -174,10 +174,8 @@ def main(args: argparse.Namespace):
         "--description",
         type=str,
         default="benchmark-throughput",
-        help=
-        "Benchmark description. This is primarily useful when "
-        "we log the benchmark results and process them for plotting charts"
-    )
+        help="Benchmark description. This is primarily useful when "
+        "we log the benchmark results and process them for plotting charts")
     parser.add_argument("--backend",
                         type=str,
                         choices=["vllm"],

diff --git a/neuralmagic/benchmarks/scripts/common.py b/neuralmagic/benchmarks/scripts/common.py
@@ -12,8 +12,8 @@
 from vllm.outputs import RequestOutput
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from .datasets_registry import SHAREGPT_PATH, SHAREGPT_DOWNLOAD_STR
-from .backend_request_func import (
-    RequestFuncInput, RequestFuncOutput, async_request_vllm)
+from .backend_request_func import (RequestFuncInput, RequestFuncOutput,
+                                   async_request_vllm)
 from ...tools.call_cmd import call_cmd
 
 
@@ -188,7 +188,7 @@ async def process_requests(input_requests):
 
 def format_io_log(prompt: str, output_text: str, n_prompt_tokens: int,
                   n_output_tokens: int) -> str:
-    return f"\n=== Prompt ({n_prompt_tokens}) ==\n{prompt}\n==== output({n_output_tokens}) ==\n{output_text}\n" # noqa: E501
+    return f"\n=== Prompt ({n_prompt_tokens}) ==\n{prompt}\n==== output({n_output_tokens}) ==\n{output_text}\n"  # noqa: E501
 
 
 def print_request_outputs(results: List[RequestOutput]) -> None:

diff --git a/neuralmagic/benchmarks/scripts/datasets_registry.py b/neuralmagic/benchmarks/scripts/datasets_registry.py
@@ -63,8 +63,8 @@ def get_ultrachat(tokenizer: PreTrainedTokenizerBase,
     prompts = []
     completions = []
     system_message = {
-        "content":  "You are a chatbot with the explicit goal of "
-                    "helping the user as best as possible",
+        "content": "You are a chatbot with the explicit goal of "
+        "helping the user as best as possible",
         "role": "system",
     }
     for messages in ds["messages"]:

diff --git a/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py b/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py
@@ -10,9 +10,8 @@
 from dataclasses import dataclass
 from typing import List, Iterable, NamedTuple
 
-from .benchmark_result import (
-    GHABenchmarkToolName, BenchmarkResult, MetricTemplate
-)
+from .benchmark_result import (GHABenchmarkToolName, BenchmarkResult,
+                               MetricTemplate)
 
 
 @dataclass

diff --git a/neuralmagic/tools/call_cmd.py b/neuralmagic/tools/call_cmd.py
@@ -1,8 +1,8 @@
 #
-# Run cmd as a sub-process.  
+# Run cmd as a sub-process.
 #
 # Capture stdout, stderr, return status, elapsed time and
-# optionally process statistics 
+# optionally process statistics
 # (user time, system time, peak memory usage, etc.)
 #
 import os
@@ -15,7 +15,7 @@
 
 def parse_process_stats(str):
     exp = (
-        "\[Timing\].*: elapsed=([0-9\.]+) user=([0-9\.]+) system=([0-9\.]+) "       # noqa: E501
+        "\[Timing\].*: elapsed=([0-9\.]+) user=([0-9\.]+) system=([0-9\.]+) "  # noqa: E501
         "maxrss=([0-9\.]+) avgrss=([0-9\.]+) avgmem=([0-9\.]+) avgdata=([0-9\.]+)"  # noqa: E501
     )
     results = re.search(exp, str)