From 19399b7c2b3681cf19ff87203e16bfbf406cbe66 Mon Sep 17 00:00:00 2001
From: Mark Kurtz <mark.kurtz@neuralmagic.com>
Date: Fri, 19 Jul 2024 10:34:55 -0400
Subject: [PATCH] Updates for pydantic serialization / deserialization to
 rebase on current main, address review comments, and finalize last pieces as
 well as test fixes

---
 .pre-commit-config.yaml                    |   3 +
 pyproject.toml                             |   9 +-
 src/guidellm/backend/base.py               |   8 +-
 src/guidellm/core/distribution.py          |  33 ++--
 src/guidellm/core/request.py               |  40 ++---
 src/guidellm/core/result.py                | 182 ++++++++++++++-------
 src/guidellm/core/serializable.py          |  13 +-
 src/guidellm/executor/profile_generator.py |  18 +-
 src/guidellm/main.py                       |   8 +-
 src/guidellm/scheduler/scheduler.py        |   5 +-
 tests/unit/backend/test_openai_backend.py  |   1 -
 tests/unit/core/test_distribution.py       |  10 +-
 tests/unit/core/test_request.py            |  32 ++--
 13 files changed, 226 insertions(+), 136 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 16fd4d1..ec7b37e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -28,6 +28,8 @@ repos:
         datasets,
         loguru,
         numpy,
+        pydantic,
+        pyyaml,
         openai,
         requests,
         transformers,
@@ -38,4 +40,5 @@ repos:
         # types
         types-click,
         types-requests,
+        types-PyYAML,
       ]
diff --git a/pyproject.toml b/pyproject.toml
index 0a48d13..fcd6d2a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,8 @@ dependencies = [
     "loguru",
     "numpy",
     "openai",
+    "pydantic>=2.0.0",
+    "pyyaml>=6.0.0",
     "requests",
     "transformers",
 ]
@@ -48,7 +50,12 @@ dev = [
     "pytest-mock~=3.14.0",
     "ruff~=0.5.2",
     "tox~=4.16.0",
-    "types-requests~=2.32.0"
+    "types-requests~=2.32.0",
+
+    # type-checking
+    "types-click",
+    "types-requests",
+    "types-PyYAML",
 ]
 
 
diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py
index 644d28c..031f6a2 100644
--- a/src/guidellm/backend/base.py
+++ b/src/guidellm/backend/base.py
@@ -91,7 +91,9 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
 
         logger.info(f"Submitting request with prompt: {request.prompt}")
 
-        result = TextGenerationResult(TextGenerationRequest(prompt=request.prompt))
+        result = TextGenerationResult(
+            request=TextGenerationRequest(prompt=request.prompt)
+        )
         result.start(request.prompt)
 
         for response in self.make_request(request):  # GenerativeResponse
@@ -99,8 +101,8 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
                 result.output_token(response.add_token)
             elif response.type_ == "final":
                 result.end(
-                    response.prompt_token_count,
-                    response.output_token_count,
+                    prompt_token_count=response.prompt_token_count,
+                    output_token_count=response.output_token_count,
                 )
 
         logger.info(f"Request completed with output: {result.output}")
diff --git a/src/guidellm/core/distribution.py b/src/guidellm/core/distribution.py
index a48291d..cefe39a 100644
--- a/src/guidellm/core/distribution.py
+++ b/src/guidellm/core/distribution.py
@@ -1,7 +1,8 @@
-from typing import List, Optional, Union
+from typing import List, Sequence
 
 import numpy as np
 from loguru import logger
+from pydantic import Field
 
 from guidellm.core.serializable import Serializable
 
@@ -14,24 +15,12 @@ class Distribution(Serializable):
     statistical analyses.
     """
 
-    def __init__(self, **data):
-        super().__init__(**data)
-        logger.debug(f"Initialized Distribution with data: {self.data}")
+    data: Sequence[float] = Field(
+        default_factory=list, description="The data points of the distribution."
+    )
 
-    def __str__(self) -> str:
-        """
-        Return a string representation of the Distribution.
-        """
-        return (
-            f"Distribution(mean={self.mean:.2f}, median={self.median:.2f}, "
-            f"min={self.min}, max={self.max}, count={len(self.data)})"
-        )
-
-    def __repr__(self) -> str:
-        """
-        Return an unambiguous string representation of the Distribution for debugging.
-        """
-        return f"Distribution(data={self.data})"
+    def __str__(self):
+        return f"Distribution({self.describe()})"
 
     @property
     def mean(self) -> float:
@@ -99,7 +88,7 @@ def percentile(self, percentile: float) -> float:
             logger.warning("No data points available to calculate percentile.")
             return 0.0
 
-        percentile_value = np.percentile(self._data, percentile).item()
+        percentile_value = np.percentile(self.data, percentile).item()
         logger.debug(f"Calculated {percentile}th percentile: {percentile_value}")
         return percentile_value
 
@@ -180,15 +169,15 @@ def describe(self) -> dict:
         logger.debug(f"Generated description: {description}")
         return description
 
-    def add_data(self, new_data: Union[List[int], List[float]]):
+    def add_data(self, new_data: Sequence[float]):
         """
         Add new data points to the distribution.
         :param new_data: A list of new numerical data points to add.
         """
-        self.data.extend(new_data)
+        self.data = list(self.data) + list(new_data)
         logger.debug(f"Added new data: {new_data}")
 
-    def remove_data(self, remove_data: Union[List[int], List[float]]):
+    def remove_data(self, remove_data: Sequence[float]):
         """
         Remove specified data points from the distribution.
         :param remove_data: A list of numerical data points to remove.
diff --git a/src/guidellm/core/request.py b/src/guidellm/core/request.py
index 98df6c7..89abaf7 100644
--- a/src/guidellm/core/request.py
+++ b/src/guidellm/core/request.py
@@ -1,5 +1,7 @@
 import uuid
-from typing import Dict, Optional, Any
+from typing import Any, Dict, Optional
+
+from pydantic import Field
 
 from guidellm.core.serializable import Serializable
 
@@ -9,24 +11,18 @@ class TextGenerationRequest(Serializable):
     A class to represent a text generation request for generative AI workloads.
     """
 
-    id: str
-    prompt: str
-    prompt_token_count: Optional[int]
-    generated_token_count: Optional[int]
-    params: Dict[str, Any]
-
-    def __init__(
-        self,
-        prompt: str,
-        prompt_token_count: Optional[int] = None,
-        generated_token_count: Optional[int] = None,
-        params: Optional[Dict[str, Any]] = None,
-        id: Optional[str] = None,
-    ):
-        super().__init__(
-            id=str(uuid.uuid4()) if id is None else id,
-            prompt=prompt,
-            prompt_token_count=prompt_token_count,
-            generated_token_count=generated_token_count,
-            params=params or {},
-        )
+    id: str = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="The unique identifier for the request.",
+    )
+    prompt: str = Field(description="The input prompt for the text generation.")
+    prompt_token_count: Optional[int] = Field(
+        default=None, description="The number of tokens in the input prompt."
+    )
+    generate_token_count: Optional[int] = Field(
+        default=None, description="The number of tokens to generate."
+    )
+    params: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="The parameters for the text generation request.",
+    )
diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
index ef536dd..7decb62 100644
--- a/src/guidellm/core/result.py
+++ b/src/guidellm/core/result.py
@@ -2,6 +2,7 @@
 from typing import Any, Dict, List, Optional, Union
 
 from loguru import logger
+from pydantic import Field
 
 from guidellm.core.distribution import Distribution
 from guidellm.core.request import TextGenerationRequest
@@ -22,19 +23,43 @@ class TextGenerationResult(Serializable):
     for generative AI workloads.
     """
 
-    request: TextGenerationRequest
-    prompt: str = ""
-    prompt_word_count: int = 0
-    prompt_token_count: int = 0
-    output: str = ""
-    output_word_count: int = 0
-    output_token_count: int = 0
-    last_time: Optional[float] = None
-    first_token_set: bool = False
-    start_time: Optional[float] = None
-    end_time: Optional[float] = None
-    first_token_time: Optional[float] = None
-    decode_times: Distribution = Distribution()
+    request: TextGenerationRequest = Field(
+        description="The text generation request used to generate the result."
+    )
+    prompt: str = Field(
+        default_factory=str, description="The input prompt for the text generation."
+    )
+    prompt_word_count: int = Field(
+        default=0, description="The number of words in the input prompt."
+    )
+    prompt_token_count: int = Field(
+        default=0, description="The number of tokens in the input prompt."
+    )
+    output: str = Field(
+        default_factory=str, description="The generated output for the text generation."
+    )
+    output_word_count: int = Field(
+        default=0, description="The number of words in the output."
+    )
+    output_token_count: int = Field(
+        default=0, description="The number of tokens in the output."
+    )
+    last_time: float = Field(default=None, description="The last time recorded.")
+    first_token_set: bool = Field(
+        default=False, description="Whether the first token time is set."
+    )
+    start_time: float = Field(
+        default=None, description="The start time of the text generation."
+    )
+    end_time: float = Field(
+        default=None, description="The end time of the text generation."
+    )
+    first_token_time: float = Field(
+        default=None, description="The time taken to decode the first token."
+    )
+    decode_times: Distribution = Field(
+        default_factory=Distribution, description="The distribution of decode times."
+    )
 
     def start(self, prompt: str):
         """
@@ -52,25 +77,6 @@ def start(self, prompt: str):
 
         logger.info("Text generation started with prompt: '{}'", prompt)
 
-    def _recording_started(self, raise_exception: bool = True) -> bool:
-        """
-        Ensure that the benchmark text generation recording is started.
-
-        We can assume that if the `self._start_time` exist,
-        then the `start()` has been called.
-        """
-
-        if self._start_time is not None:
-            return True
-        else:
-            if raise_exception is True:
-                raise ValueError(
-                    "start time is not specified. "
-                    "Did you make the `text_generation_benchmark.start()`?"
-                )
-            else:
-                return False
-
     def output_token(self, token: str):
         """
         Add a token to the output and record the decode time.
@@ -80,13 +86,13 @@ def output_token(self, token: str):
         """
         current_counter = time()
 
-        if not self._first_token_set:
+        if not self.first_token_set:
             self.first_token_time = current_counter - self.last_time
             self.first_token_set = True
-            logger.debug(f"First token decode time: {self._first_token_time}")
+            logger.debug(f"First token decode time: {self.first_token_time}")
         else:
             decode_time = current_counter - self.last_time
-            self._decode_times.add_data([decode_time])
+            self.decode_times.add_data([decode_time])
             logger.debug(f"Token '{token}' decoded in {decode_time} seconds")
 
         self.last_time = current_counter
@@ -95,6 +101,7 @@ def output_token(self, token: str):
 
     def end(
         self,
+        output: Optional[str] = None,
         prompt_token_count: Optional[int] = None,
         output_token_count: Optional[int] = None,
     ):
@@ -111,12 +118,35 @@ def end(
         :type output_token_count: Optional[int]
         """
         self.end_time = time()
+
+        if output:
+            self.output = output
+
         self.output_word_count = len(self.output.split())
-        self.output_token_count = output_token_count or self._output_word_count
-        self.prompt_token_count = prompt_token_count or self._prompt_word_count
+        self.output_token_count = output_token_count or self.output_word_count
+        self.prompt_token_count = prompt_token_count or self.prompt_word_count
 
         logger.info(f"Text generation ended with output: '{self.output}'")
 
+    def _check_recording_started(self, raise_exception: bool = True) -> bool:
+        """
+        Ensure that the benchmark text generation recording is started.
+
+        We can assume that if the `self._start_time` exist,
+        then the `start()` has been called.
+        """
+
+        if self.start_time is not None:
+            return True
+        else:
+            if raise_exception is True:
+                raise ValueError(
+                    "start time is not specified. "
+                    "Did you make the `text_generation_benchmark.start()`?"
+                )
+            else:
+                return False
+
 
 class TextGenerationError(Serializable):
     """
@@ -124,8 +154,12 @@ class TextGenerationError(Serializable):
     for generative AI workloads.
     """
 
-    request: TextGenerationRequest
-    error: str
+    request: TextGenerationRequest = Field(
+        description="The text generation request that resulted in an error."
+    )
+    error: str = Field(
+        description="The error message that occurred during text generation."
+    )
 
     def __init__(self, request: TextGenerationRequest, error: Exception):
         super().__init__(request=request, error=str(error))
@@ -137,10 +171,10 @@ class RequestConcurrencyMeasurement(Serializable):
     A dataclass to represent the concurrency measurement of a request.
     """
 
-    time: float
-    completed: int
-    errored: int
-    processing: int
+    time: float = Field(description="The time of the measurement.")
+    completed: int = Field(description="The number of completed requests.")
+    errored: int = Field(description="The number of errored requests.")
+    processing: int = Field(description="The number of processing requests.")
 
 
 class TextGenerationBenchmark(Serializable):
@@ -150,11 +184,20 @@ class TextGenerationBenchmark(Serializable):
     This is a set of results and errors for a specific mode and rate.
     """
 
-    mode: str
-    rate: Optional[float]
-    results: List[TextGenerationResult] = []
-    errors: List[TextGenerationError] = []
-    concurrencies: List[RequestConcurrencyMeasurement] = []
+    mode: str = Field(description="The generation mode, either 'async' or 'sync'.")
+    rate: float = Field(
+        default=None, description="The requested rate of requests per second."
+    )
+    results: List[TextGenerationResult] = Field(
+        default_factory=list, description="The results of the text generation requests."
+    )
+    errors: List[TextGenerationError] = Field(
+        default_factory=list, description="The errors of the text generation requests."
+    )
+    concurrencies: List[RequestConcurrencyMeasurement] = Field(
+        default_factory=list,
+        description="The concurrency measurements of the requests.",
+    )
 
     def __iter__(self):
         """
@@ -162,7 +205,7 @@ def __iter__(self):
 
         :return: An iterator over the results.
         """
-        return iter(self._results)
+        return iter(self.results)
 
     @property
     def request_count(self) -> int:
@@ -185,20 +228,45 @@ def error_count(self) -> int:
         return len(self.errors)
 
     @property
-    def request_rate(self) -> float:
+    def completed_request_rate(self) -> float:
         """
         Get the rate of requests per second in the result.
 
         :return: The rate of requests per second.
         :rtype: float
         """
-        if not self._results:
+        if not self.results:
             return 0.0
         else:
             return self.request_count / (
-                self._results[-1].end_time - self._results[0].start_time
+                self.results[-1].end_time - self.results[0].start_time
             )
 
+    @property
+    def overloaded(self) -> bool:
+        if not self.results or not self.concurrencies:
+            raise ValueError("No results or concurrencies to check for overload.")
+
+        if self.rate is None or len(self.concurrencies) < 2:
+            # if rate was not set, sync mode is assumed,
+            # or we have less than 2 data points,
+            # then we cannot be overloaded by definition
+            return False
+
+        if self.completed_request_rate < 0.60 * self.rate:
+            # if the calculated rate is less than 60% of the requested rate,
+            # safe to assume the system is overloaded
+            return True
+
+        # rate comparisons did not give a clear signal,
+        # let's double check that we aren't overloaded by comparing the
+        # compute throughput for the benchmark with the latency for the requests.
+        # overall this means that a relatively flat or decreasing throughput curve
+        # over time in addition to a growing processing queue is a sign of overload
+
+        # TODO
+        return False
+
     def request_started(self):
         """
         Record the start of a generation request.
@@ -266,8 +334,12 @@ class TextGenerationBenchmarkReport(Serializable):
     This is a collection of benchmarks for different modes and rates.
     """
 
-    benchmarks: List[TextGenerationBenchmark] = []
-    args: List[Dict[str, Any]] = []
+    benchmarks: List[TextGenerationBenchmark] = Field(
+        default_factory=list, description="The benchmarks of text generation requests."
+    )
+    args: List[Dict[str, Any]] = Field(
+        default_factory=list, description="The arguments used for the benchmarks."
+    )
 
     def __iter__(self):
         return iter(self.benchmarks)
@@ -280,7 +352,7 @@ def benchmarks_sorted(self) -> List[TextGenerationBenchmark]:
         :return: The sorted list of benchmarks.
         :rtype: List[TextGenerationBenchmark]
         """
-        benchmarks = sorted(self.benchmarks, key=lambda x: x.request_rate)
+        benchmarks = sorted(self.benchmarks, key=lambda x: x.completed_request_rate)
         return benchmarks
 
     def add_benchmark(self, benchmark: TextGenerationBenchmark):
diff --git a/src/guidellm/core/serializable.py b/src/guidellm/core/serializable.py
index afa40dd..7749026 100644
--- a/src/guidellm/core/serializable.py
+++ b/src/guidellm/core/serializable.py
@@ -2,7 +2,7 @@
 
 import yaml
 from loguru import logger
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 
 class Serializable(BaseModel):
@@ -11,6 +11,13 @@ class Serializable(BaseModel):
     deserialization.
     """
 
+    model_config = ConfigDict(
+        extra="forbid",
+        use_enum_values=True,
+        validate_assignment=True,
+        from_attributes=True,
+    )
+
     def __init__(self, /, **data: Any) -> None:
         super().__init__(**data)
         logger.debug(
@@ -27,7 +34,6 @@ def to_yaml(self) -> str:
         """
         logger.debug("Serializing to YAML... {}", self)
         yaml_str = yaml.dump(self.model_dump())
-        logger.debug("Serialized to YAML: {}", yaml_str)
 
         return yaml_str
 
@@ -41,7 +47,6 @@ def from_yaml(cls, data: str):
         """
         logger.debug("Deserializing from YAML... {}", data)
         obj = cls.model_validate(yaml.safe_load(data))
-        logger.debug("Deserialized from YAML: {}", obj)
 
         return obj
 
@@ -53,7 +58,6 @@ def to_json(self) -> str:
         """
         logger.debug("Serializing to JSON... {}", self)
         json_str = self.model_dump_json()
-        logger.debug("Serialized to JSON: {}", json_str)
 
         return json_str
 
@@ -67,6 +71,5 @@ def from_json(cls, data: str):
         """
         logger.debug("Deserializing from JSON... {}", data)
         obj = cls.model_validate_json(data)
-        logger.debug("Deserialized from JSON: {}", obj)
 
         return obj
diff --git a/src/guidellm/executor/profile_generator.py b/src/guidellm/executor/profile_generator.py
index 8946a25..fbfde65 100644
--- a/src/guidellm/executor/profile_generator.py
+++ b/src/guidellm/executor/profile_generator.py
@@ -120,9 +120,9 @@ def next_profile(
 
             if not last_benchmark.overloaded:
                 last_rate = (
-                    last_benchmark.args_rate
-                    if last_benchmark.args_rate
-                    else last_benchmark.request_rate
+                    last_benchmark.rate
+                    if last_benchmark.rate
+                    else last_benchmark.completed_request_rate
                 )
                 return Profile(
                     load_gen_mode=LoadGenerationModes.CONSTANT,
@@ -133,14 +133,14 @@ def next_profile(
                 first_benchmark = current_report.benchmarks[0]
 
                 min_rate = (
-                    first_benchmark.args_rate
-                    if first_benchmark.args_rate
-                    else first_benchmark.request_rate
+                    first_benchmark.rate
+                    if first_benchmark.rate
+                    else first_benchmark.completed_request_rate
                 )
                 max_rate = (
-                    last_benchmark.args_rate
-                    if last_benchmark.args_rate
-                    else last_benchmark.request_rate
+                    last_benchmark.rate
+                    if last_benchmark.rate
+                    else last_benchmark.completed_request_rate
                 )
 
                 self._pending_rates = list(numpy.linspace(min_rate, max_rate, 10))
diff --git a/src/guidellm/main.py b/src/guidellm/main.py
index 2fb5508..0e7cc55 100644
--- a/src/guidellm/main.py
+++ b/src/guidellm/main.py
@@ -1,5 +1,3 @@
-import json
-
 import click
 
 from guidellm.backend import Backend
@@ -125,13 +123,13 @@ def main(
 
 
 def save_report(report: TextGenerationBenchmarkReport, filename: str):
-    with open(filename, "w") as f:
-        json.dump(report.to_dict(), f, indent=4)
+    with open(filename, "w") as file:
+        file.write(report.to_json())
 
 
 def print_report(report: TextGenerationBenchmarkReport):
     for benchmark in report.benchmarks:
-        print(f"Rate: {benchmark.request_rate}, Results: {benchmark.results}")
+        print(f"Rate: {benchmark.completed_request_rate}, Results: {benchmark.results}")
 
 
 if __name__ == "__main__":
diff --git a/src/guidellm/scheduler/scheduler.py b/src/guidellm/scheduler/scheduler.py
index 51eee11..3c02b27 100644
--- a/src/guidellm/scheduler/scheduler.py
+++ b/src/guidellm/scheduler/scheduler.py
@@ -49,7 +49,7 @@ def run(self) -> TextGenerationBenchmark:
         return result
 
     def _run_sync(self) -> TextGenerationBenchmark:
-        result_set = TextGenerationBenchmark(mode=self._load_gen_mode.value, rate=None)
+        result_set = TextGenerationBenchmark(mode=self._load_gen_mode.value)
         start_time = time.time()
         counter = 0
 
@@ -68,6 +68,9 @@ def _run_sync(self) -> TextGenerationBenchmark:
         return result_set
 
     async def _run_async(self) -> TextGenerationBenchmark:
+        if self._load_gen_rate is None:
+            raise ValueError("Rate must be specified for asynchronous load generation")
+
         result_set = TextGenerationBenchmark(
             mode=self._load_gen_mode.value, rate=self._load_gen_rate
         )
diff --git a/tests/unit/backend/test_openai_backend.py b/tests/unit/backend/test_openai_backend.py
index 93a5a29..39e79da 100644
--- a/tests/unit/backend/test_openai_backend.py
+++ b/tests/unit/backend/test_openai_backend.py
@@ -82,7 +82,6 @@ def test_make_request(
         backend_service.make_request(request=request),
         openai_completion_create_patch,
     ):
-
         total_generative_responses += 1
         expected_token: Optional[str] = getattr(patched_completion, "content") or None
 
diff --git a/tests/unit/core/test_distribution.py b/tests/unit/core/test_distribution.py
index fac99bf..111b9c7 100644
--- a/tests/unit/core/test_distribution.py
+++ b/tests/unit/core/test_distribution.py
@@ -46,7 +46,15 @@ def test_distribution_remove_data():
 def test_distribution_str():
     data = [1, 2, 3, 4, 5]
     dist = Distribution(data=data)
-    assert str(dist) == "Distribution(mean=3.00, median=3.00, min=1, max=5, count=5)"
+    assert str(dist) == (
+        "Distribution({'mean': 3.0, 'median': 3.0, "
+        "'variance': 2.0, 'std_deviation': 1.4142135623730951, "
+        "'percentile_indices': "
+        "[10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99], "
+        "'percentile_values': "
+        "[1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 4.8, 4.96], "
+        "'min': 1, 'max': 5, 'range': 4})"
+    )
 
 
 @pytest.mark.regression
diff --git a/tests/unit/core/test_request.py b/tests/unit/core/test_request.py
index fbd239c..327397d 100644
--- a/tests/unit/core/test_request.py
+++ b/tests/unit/core/test_request.py
@@ -6,10 +6,10 @@
 @pytest.mark.smoke
 def test_text_generation_request_initialization():
     prompt = "Generate a story"
-    request = TextGenerationRequest(prompt)
+    request = TextGenerationRequest(prompt=prompt)
     assert request.prompt == prompt
     assert request.prompt_token_count is None
-    assert request.generated_token_count is None
+    assert request.generate_token_count is None
     assert request.params == {}
 
 
@@ -17,17 +17,17 @@ def test_text_generation_request_initialization():
 def test_text_generation_request_initialization_with_params():
     prompt = "Generate a story"
     prompt_token_count = 50
-    generated_token_count = 100
+    generate_token_count = 100
     params = {"temperature": 0.7}
     request = TextGenerationRequest(
         prompt=prompt,
         prompt_token_count=prompt_token_count,
-        generated_token_count=generated_token_count,
+        generate_token_count=generate_token_count,
         params=params,
     )
     assert request.prompt == prompt
     assert request.prompt_token_count == prompt_token_count
-    assert request.generated_token_count == generated_token_count
+    assert request.generate_token_count == generate_token_count
     assert request.params == params
 
 
@@ -35,9 +35,14 @@ def test_text_generation_request_initialization_with_params():
 def test_request_json():
     prompt = "Generate text"
     prompt_token_count = 10
-    generated_token_count = 50
+    generate_token_count = 50
     params = {"temperature": 0.7}
-    request = TextGenerationRequest(prompt=prompt, prompt_token_count=prompt_token_count, generated_token_count=generated_token_count, params=params)
+    request = TextGenerationRequest(
+        prompt=prompt,
+        prompt_token_count=prompt_token_count,
+        generate_token_count=generate_token_count,
+        params=params,
+    )
     json_str = request.to_json()
     assert '"prompt":"Generate text"' in json_str
     assert '"id":' in json_str
@@ -46,7 +51,7 @@ def test_request_json():
     assert request.id == request_restored.id
     assert request_restored.prompt == prompt
     assert request_restored.prompt_token_count == prompt_token_count
-    assert request_restored.generated_token_count == generated_token_count
+    assert request_restored.generate_token_count == generate_token_count
     assert request_restored.params == params
 
 
@@ -54,9 +59,14 @@ def test_request_json():
 def test_request_yaml():
     prompt = "Generate text"
     prompt_token_count = 15
-    generated_token_count = 55
+    generate_token_count = 55
     params = {"temperature": 0.8}
-    request = TextGenerationRequest(prompt=prompt, prompt_token_count=prompt_token_count, generated_token_count=generated_token_count, params=params)
+    request = TextGenerationRequest(
+        prompt=prompt,
+        prompt_token_count=prompt_token_count,
+        generate_token_count=generate_token_count,
+        params=params,
+    )
     yaml_str = request.to_yaml()
     assert "prompt: Generate text" in yaml_str
     assert "id:" in yaml_str
@@ -65,5 +75,5 @@ def test_request_yaml():
     assert request.id == request_restored.id
     assert request_restored.prompt == prompt
     assert request_restored.prompt_token_count == prompt_token_count
-    assert request_restored.generated_token_count == generated_token_count
+    assert request_restored.generate_token_count == generate_token_count
     assert request_restored.params == params