Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
yapf after ruff :)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-redhat committed Mar 15, 2024
1 parent e283528 commit c5633f2
Show file tree
Hide file tree
Showing 12 changed files with 50 additions and 74 deletions.
1 change: 0 additions & 1 deletion benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# flake8: noqa
# UPSTREAM SYNC: noqa is required for passing ruff run on nm-automation

"""Benchmark online serving throughput.
On the server side, run one of the following commands:
Expand Down
2 changes: 1 addition & 1 deletion csrc/punica/bgmv/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "bgmv_impl.cuh"
FOR_BGMV_WIDE_NARROW(INST_BGMV_TWOSIDE, {input_dtype}, {output_dtype}, {weight_dtype})
""".lstrip() # noqa: E501 (UPSTREAM SYNC nm-automation)
""".lstrip() # noqa: E501 (UPSTREAM SYNC nm-automation)

for input_dtype in DTYPES:
for output_dtype in DTYPES:
Expand Down
2 changes: 1 addition & 1 deletion neuralmagic/benchmarks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def max_model_length_from_model_id(model: str,


def script_args_to_cla(config: NamedTuple) -> Iterable[dict]:
# config is a NamedTuple constructed from some JSON
# config is a NamedTuple constructed from some JSON
# in neuralmagic/benchmarks/configs
kv = vars(config.script_args)

Expand Down
35 changes: 12 additions & 23 deletions neuralmagic/benchmarks/run_benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
from typing import NamedTuple, Optional
from pathlib import Path

from .common import (
download_model, max_model_length_from_model_id,
script_args_to_cla, benchmark_configs
)
from .common import (download_model, max_model_length_from_model_id,
script_args_to_cla, benchmark_configs)
from .scripts.common import warmup_server, num_available_gpus
from ..tools.call_cmd import call_cmd

Expand Down Expand Up @@ -59,26 +57,19 @@ def try_connection() -> bool:
return False


def run_benchmark_serving_script(
config: NamedTuple,
output_directory: Optional[Path] = None
) -> None:
def run_benchmark_serving_script(config: NamedTuple,
output_directory: Optional[Path] = None
) -> None:
assert config.script_name == 'benchmark_serving'

def run_bench(
server_cmd: str,
bench_cmd: list[str],
model: str
) -> None:
def run_bench(server_cmd: str, bench_cmd: list[str], model: str) -> None:
try:
# start server
server_process = subprocess.Popen(
"exec " + server_cmd, shell=True)
server_process = subprocess.Popen("exec " + server_cmd, shell=True)
if not is_server_running(BENCH_SERVER_HOST, BENCH_SERVER_PORT):
raise ValueError(
f"Aborting bench run with : server-cmd {server_cmd} , "
f"bench-cmd {bench_cmd}. Reason: Cannot start Server"
)
f"bench-cmd {bench_cmd}. Reason: Cannot start Server")

# server warmup
warmup_server(server_host=BENCH_SERVER_HOST,
Expand Down Expand Up @@ -106,16 +97,14 @@ def run_bench(

supported_max_model_len = max_model_length_from_model_id(model)

# If the requested model-len is too big, try running with the
# If the requested model-len is too big, try running with the
# maximum supported for this model.
max_model_lens = set(
map(lambda v: min(v, supported_max_model_len),
config.max_model_lens))
if (config.max_model_lens != list(max_model_lens)):
print(
f"WARNING: max_model_len modified to {max_model_lens} "
f"from {config.max_model_lens} for model {model}"
)
print(f"WARNING: max_model_len modified to {max_model_lens} "
f"from {config.max_model_lens} for model {model}")

for max_model_len in max_model_lens:

Expand All @@ -132,7 +121,7 @@ def run_bench(
server_args["sparsity"] = sparsity

server_cmd = "python3 -m vllm.entrypoints.api_server " + \
" ".join([f"--{k} {v}"
" ".join([f"--{k} {v}"
for k, v in server_args.items()])

for script_args in script_args_to_cla(config):
Expand Down
13 changes: 5 additions & 8 deletions neuralmagic/benchmarks/run_benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
from pathlib import Path
from typing import NamedTuple, Optional

from .common import (
script_args_to_cla, benchmark_configs, max_model_length_from_model_id
)
from .common import (script_args_to_cla, benchmark_configs,
max_model_length_from_model_id)
from ..tools.call_cmd import call_cmd


Expand All @@ -21,16 +20,14 @@ def run_benchmark_throughput_script(config: NamedTuple,

supported_max_model_len = max_model_length_from_model_id(model)

# If the requested model-len is too big, try running with
# If the requested model-len is too big, try running with
# the maximum supported for this model.
max_model_lens = set(
map(lambda v: min(v, supported_max_model_len),
config.max_model_lens))
if (config.max_model_lens != list(max_model_lens)):
print(
f"WARNING: max_model_len modified to {max_model_lens} "
f"from {config.max_model_lens} for model {model}"
)
print(f"WARNING: max_model_len modified to {max_model_lens} "
f"from {config.max_model_lens} for model {model}")

for max_model_len in max_model_lens:
for script_args in script_args_to_cla(config):
Expand Down
2 changes: 1 addition & 1 deletion neuralmagic/benchmarks/scripts/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ async def async_request_deepspeed_mii(
output = RequestFuncOutput()
output.prompt_len = request_func_input.prompt_len

# DeepSpeed-MII doesn't support streaming as of Jan 28 2024,
# DeepSpeed-MII doesn't support streaming as of Jan 28 2024,
# will use 0 as placeholder.
# https://github.com/microsoft/DeepSpeed-MII/pull/311
output.ttft = 0
Expand Down
38 changes: 16 additions & 22 deletions neuralmagic/benchmarks/scripts/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,7 @@ def script_args_as_json_dict(script_args: argparse.Namespace):

# Add information about the derived variables as metadata
metadata_key = BenchmarkResult.METADATA_KEY_
result[metadata_key][
ResultMetadataKeys.num_prompts] = num_prompts
result[metadata_key][ResultMetadataKeys.num_prompts] = num_prompts
result[metadata_key][ResultMetadataKeys.request_rate] = \
request_rate if request_rate < float("inf") else "inf"

Expand Down Expand Up @@ -384,10 +383,8 @@ def from_str(arg: str):
"--description",
type=str,
default="benchmark-serving",
help=
"Benchmark description. This is primarily useful when "
"we log the benchmark results and process them for plotting charts"
)
help="Benchmark description. This is primarily useful when "
"we log the benchmark results and process them for plotting charts")
parser.add_argument(
"--backend",
type=str,
Expand Down Expand Up @@ -436,7 +433,7 @@ def from_str(arg: str):
"--tokenizer",
type=str,
help="Name or path of the tokenizer, "
"if not using the default model tokenizer.",
"if not using the default model tokenizer.",
)
parser.add_argument(
"--best-of",
Expand Down Expand Up @@ -480,15 +477,14 @@ def from_str(arg: str):
"Otherwise, we use Poisson process to synthesize "
"the request arrival times.",
)
parser.add_argument(
"--nr-qps-pair_",
type=NumPrompts_RequestRate_T.from_str,
help="""
parser.add_argument("--nr-qps-pair_",
type=NumPrompts_RequestRate_T.from_str,
help="""
First argument in the pair is num_prompts to process.
Second argument in the pair is request_rate per second.
If this is inf, then all the requests are sent at time 0.
Otherwise, we use Poisson process to synthesize""",
default=None)
default=None)

# Server command args
parser.add_argument(
Expand All @@ -503,27 +499,25 @@ def from_str(arg: str):
"--server-args",
type=str,
default=None,
help=
"When we are logging the output, it is useful to log the "
"arguments passed to the server"
)
help="When we are logging the output, it is useful to log the "
"arguments passed to the server")

def args_sanity_check(args):
# Sanity check real-dataset vs synthetic-dataset usecase
if args.dataset is None:
assert (args.num_input_tokens is not None and
args.num_output_tokens is not None)
assert (args.num_input_tokens is not None
and args.num_output_tokens is not None)
else:
assert (args.num_input_tokens is None and
args.num_output_tokens is None)
assert (args.num_input_tokens is None
and args.num_output_tokens is None)
# Sanity check num_prompts, request_rate as separate args vs joint args
assert not all([
args.num_prompts_ is None, args.request_rate_ is None,
args.nr_qps_pair_ is None
])
if args.nr_qps_pair_ is None:
assert (args.num_prompts_ is not None and
args.request_rate_ is not None)
assert (args.num_prompts_ is not None
and args.request_rate_ is not None)
else:
assert args.num_prompts_ is None and args.request_rate_ is None
# Sanity check required logging args
Expand Down
10 changes: 4 additions & 6 deletions neuralmagic/benchmarks/scripts/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pathlib import Path
from typing import List, Optional, Tuple
from transformers import AutoTokenizer
from .common import (generate_synthetic_requests, warmup_vllm_engine,
from .common import (generate_synthetic_requests, warmup_vllm_engine,
num_available_gpus, print_request_outputs)
from .datasets_registry import get_dataset, DatasetArgs
from .logging.benchmark_result import (BenchmarkResult,
Expand Down Expand Up @@ -164,7 +164,7 @@ def main(args: argparse.Namespace):
current_dt_str = current_dt.strftime("%Y%m%d-%H%M%S")
file_name = Path(
args.save_directory
) / f"benchmark_throughput-{args.backend}-{model_id}-{current_dt_str}.json" # noqa: E501
) / f"benchmark_throughput-{args.backend}-{model_id}-{current_dt_str}.json" # noqa: E501
result.store(file_name)


Expand All @@ -174,10 +174,8 @@ def main(args: argparse.Namespace):
"--description",
type=str,
default="benchmark-throughput",
help=
"Benchmark description. This is primarily useful when "
"we log the benchmark results and process them for plotting charts"
)
help="Benchmark description. This is primarily useful when "
"we log the benchmark results and process them for plotting charts")
parser.add_argument("--backend",
type=str,
choices=["vllm"],
Expand Down
6 changes: 3 additions & 3 deletions neuralmagic/benchmarks/scripts/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from vllm.outputs import RequestOutput
from vllm.transformers_utils.tokenizer import get_tokenizer
from .datasets_registry import SHAREGPT_PATH, SHAREGPT_DOWNLOAD_STR
from .backend_request_func import (
RequestFuncInput, RequestFuncOutput, async_request_vllm)
from .backend_request_func import (RequestFuncInput, RequestFuncOutput,
async_request_vllm)
from ...tools.call_cmd import call_cmd


Expand Down Expand Up @@ -188,7 +188,7 @@ async def process_requests(input_requests):

def format_io_log(prompt: str, output_text: str, n_prompt_tokens: int,
n_output_tokens: int) -> str:
return f"\n=== Prompt ({n_prompt_tokens}) ==\n{prompt}\n==== output({n_output_tokens}) ==\n{output_text}\n" # noqa: E501
return f"\n=== Prompt ({n_prompt_tokens}) ==\n{prompt}\n==== output({n_output_tokens}) ==\n{output_text}\n" # noqa: E501


def print_request_outputs(results: List[RequestOutput]) -> None:
Expand Down
4 changes: 2 additions & 2 deletions neuralmagic/benchmarks/scripts/datasets_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def get_ultrachat(tokenizer: PreTrainedTokenizerBase,
prompts = []
completions = []
system_message = {
"content": "You are a chatbot with the explicit goal of "
"helping the user as best as possible",
"content": "You are a chatbot with the explicit goal of "
"helping the user as best as possible",
"role": "system",
}
for messages in ds["messages"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
from dataclasses import dataclass
from typing import List, Iterable, NamedTuple

from .benchmark_result import (
GHABenchmarkToolName, BenchmarkResult, MetricTemplate
)
from .benchmark_result import (GHABenchmarkToolName, BenchmarkResult,
MetricTemplate)


@dataclass
Expand Down
6 changes: 3 additions & 3 deletions neuralmagic/tools/call_cmd.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#
# Run cmd as a sub-process.
# Run cmd as a sub-process.
#
# Capture stdout, stderr, return status, elapsed time and
# optionally process statistics
# optionally process statistics
# (user time, system time, peak memory usage, etc.)
#
import os
Expand All @@ -15,7 +15,7 @@

def parse_process_stats(str):
exp = (
"\[Timing\].*: elapsed=([0-9\.]+) user=([0-9\.]+) system=([0-9\.]+) " # noqa: E501
"\[Timing\].*: elapsed=([0-9\.]+) user=([0-9\.]+) system=([0-9\.]+) " # noqa: E501
"maxrss=([0-9\.]+) avgrss=([0-9\.]+) avgmem=([0-9\.]+) avgdata=([0-9\.]+)" # noqa: E501
)
results = re.search(exp, str)
Expand Down

0 comments on commit c5633f2

Please sign in to comment.