From b6da6bc063af314d27eae56b9370d1100f1da34d Mon Sep 17 00:00:00 2001
From: Dmytro Parfeniuk <parfeniukinik@gmail.com>
Date: Fri, 12 Jul 2024 16:53:55 +0300
Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20WIP?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/ci.yml          | 24 +++++++-
 Makefile                          | 31 ++++++----
 pyproject.toml                    | 77 +++++++++++++++++++++++-
 setup.py                          | 64 --------------------
 src/__init__.py                   |  0
 src/guidellm/backend/base.py      | 19 +++---
 src/guidellm/core/distribution.py |  4 +-
 src/guidellm/core/result.py       | 99 +++++++++++++++++++++----------
 tox.ini                           | 13 ++++
 9 files changed, 214 insertions(+), 117 deletions(-)
 delete mode 100644 setup.py
 delete mode 100644 src/__init__.py
 create mode 100644 tox.ini

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 30404ce..648beab 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1 +1,23 @@
-TODO
\ No newline at end of file
+name: Code Quality Check
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  code_quality:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install tox
+
+      - name: Run tox
+        run: tox
+
diff --git a/Makefile b/Makefile
index 649e728..8d68396 100644
--- a/Makefile
+++ b/Makefile
@@ -1,48 +1,59 @@
 .PHONY: install
 install:
-	python -m pip install -r requirements.txt
+	python -m pip install .
+
 
 .PHONY: install.dev
 install.dev:
-	python -m pip install -e .[dev]
+	python -m pip install .[dev]
+
 
 .PHONY: build
 build:
 	python setup.py sdist bdist_wheel
 
+.PHONY: style
+style:
+	python -m ruff format src tests
+	python -m isort src tests
+	python -m flake8 src tests --max-line-length 88
+
+
+.PHONY: types
+types:
+	python -m mypy --check-untyped-defs
+
 
 .PHONY: quality
 quality:
 	python -m ruff check src tests
+	python -m black --check src tests
 	python -m isort --check src tests
-	python -m flake8 src tests --max-line-length 88
-	python -m mypy src
+	python -m mypy --check-untyped-defs
 
 
-.PHONY: style
-style:
-	python -m ruff format src tests
-	python -m isort src tests
-	python -m flake8 src tests --max-line-length 88
-
 
 .PHONY: test
 test:
 	python -m pytest -s -vvv --cache-clear tests
 
+
 .PHONY: test.unit
 test.unit:
 	python -m pytest tests/unit
 
+
 .PHONY: test.integration
 test.integration:
 	python -m pytest tests/integration
 
+
 .PHONY: test.e2e
 test.e2e:
 	python -m pytest tests/e2e
 
 
+
 .PHONY: clean
 clean:
 	rm -rf build
diff --git a/pyproject.toml b/pyproject.toml
index 368610b..5da675a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,22 +2,94 @@
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
 
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["*"]
+
+[tool.setuptools.package-data]
+guidellm = ["*"]
+
+
+# ************************************************
+# ********** Project Metadata **********
+# ************************************************
+
+[project]
+name = "guidellm"
+version = "0.1.0"
+description = "Guidance platform for deploying and managing large language models."
+readme = { file = "README.md", content-type = "text/markdown" }
+requires-python = ">=3.8.0,<4.0"
+license = { file = "LICENSE" }
+authors = [ { name = "Neuralmagic, Inc." } ]
+urls = { homepage = "https://github.com/neuralmagic/guidellm" }
+dependencies = [
+    "click",
+    "datasets",
+    "loguru",
+    "numpy",
+    "openai",
+    "requests",
+    "transformers",
+]
+
+[project.optional-dependencies]
+dev = [
+    "black",
+    "isort",
+    "mypy",
+    "pre-commit",
+    "pytest",
+    "ruff",
+    "sphinx",
+    "tox",
+]
+code_quality = [
+    "black",
+    "isort",
+    "mypy",
+    "pytest",
+    "ruff",
+]
+
+
+[project.entry-points.console_scripts]
+guidellm = "guidellm.main:main"
+
+
+# ************************************************
+# ********** Code Quality Tools **********
+# ************************************************
+
 [tool.black]
 line-length = 88
 target-version = ['py38']
 
+
 [tool.isort]
 profile = "black"
 
+
 [tool.mypy]
 files = "src/guidellm"
+python_version = '3.8'
+warn_redundant_casts = true
+warn_unused_ignores = true
+show_error_codes = true
+namespace_packages = true
+exclude = ["venv", ".tox"]
+
+# Silint "type import errors" as our 3rd-party libs does not have types
+# Check: https://mypy.readthedocs.io/en/latest/config_file.html#import-discovery
+follow_imports = 'silent'
+
 
 [tool.ruff]
+line-length = 88
 exclude = ["build", "dist", "env", ".venv"]
 lint.select = ["E", "F", "W"]
 
-[tool.flake8]
-max-line-length = 88
 
 [tool.pytest.ini_options]
 addopts = '-s -vvv --cache-clear'
@@ -27,4 +99,3 @@ markers = [
     "sanity: detailed tests to ensure major functions work correctly",
     "regression: tests to ensure that new changes do not break existing functionality"
 ]
-
diff --git a/setup.py b/setup.py
deleted file mode 100644
index fa87232..0000000
--- a/setup.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from setuptools import setup, find_packages
-from typing import Tuple
-
-
-def _setup_long_description() -> Tuple[str, str]:
-    return open("README.md", "r", encoding="utf-8").read(), "text/markdown"
-
-
-setup(
-    name='guidellm',
-    version='0.1.0',
-    author='Neuralmagic, Inc.',
-    description='Guidance platform for deploying and managing large language models.',
-    long_description=_setup_long_description()[0],
-    long_description_content_type=_setup_long_description()[1],
-    license="Apache",
-    url="https://github.com/neuralmagic/guidellm",
-    packages=find_packages(where='src'),
-    package_dir={'': 'src'},
-    include_package_data=True,
-    install_requires=[
-        'click',
-        'datasets',
-        'loguru',
-        'numpy',
-        'openai',
-        'requests',
-        'transformers',
-    ],
-    extras_require={
-        'dev': [
-            'pytest',
-            'sphinx',
-            'ruff',
-            'mypy',
-            'black',
-            'isort',
-            'flake8',
-            'pre-commit',
-        ],
-    },
-    entry_points={
-        'console_scripts': [
-            'guidellm=guidellm.main:main',
-        ],
-    },
-    python_requires=">=3.8.0",
-    classifiers=[
-        "Development Status :: 5 - Production/Stable",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3 :: Only",
-        "Intended Audience :: Developers",
-        "Intended Audience :: Education",
-        "Intended Audience :: Information Technology",
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: Apache Software License",
-        "Operating System :: POSIX :: Linux",
-        "Topic :: Scientific/Engineering",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-        "Topic :: Scientific/Engineering :: Mathematics",
-        "Topic :: Software Development",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-)
diff --git a/src/__init__.py b/src/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/guidellm/backend/base.py b/src/guidellm/backend/base.py
index 22aab80..2c58dc7 100644
--- a/src/guidellm/backend/base.py
+++ b/src/guidellm/backend/base.py
@@ -1,8 +1,7 @@
-import uuid
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from enum import Enum
-from typing import Iterator, List, Optional, Type, Union
+from typing import Dict, Iterator, List, Optional, Type, Union
 
 from loguru import logger
 
@@ -36,7 +35,7 @@ class Backend(ABC):
     An abstract base class for generative AI backends.
     """
 
-    _registry = {}
+    _registry: Dict[BackendTypes, "Type[Backend]"] = {}
 
     @staticmethod
     def register_backend(backend_type: BackendTypes):
@@ -54,7 +53,7 @@ def inner_wrapper(wrapped_class: Type["Backend"]):
         return inner_wrapper
 
     @staticmethod
-    def create_backend(backend_type: Union[str, BackendTypes], **kwargs) -> "Backend":
+    def create_backend(backend_type: BackendTypes, **kwargs) -> "Backend":
         """
         Factory method to create a backend based on the backend type.
 
@@ -65,10 +64,13 @@ def create_backend(backend_type: Union[str, BackendTypes], **kwargs) -> "Backend
         :return: An instance of a subclass of Backend.
         :rtype: Backend
         """
+
         logger.info(f"Creating backend of type {backend_type}")
+
         if backend_type not in Backend._registry:
             logger.error(f"Unsupported backend type: {backend_type}")
             raise ValueError(f"Unsupported backend type: {backend_type}")
+
         return Backend._registry[backend_type](**kwargs)
 
     def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
@@ -81,8 +83,11 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
         :rtype: TextGenerationResult
         """
         logger.info(f"Submitting request with prompt: {request.prompt}")
-        result_id = str(uuid.uuid4())
-        result = TextGenerationResult(result_id)
+
+        # TODO: Doublecheck why do we need the result id
+        # result_id = str(uuid.uuid4())
+
+        result = TextGenerationResult(TextGenerationRequest(prompt=request.prompt))
         result.start(request.prompt)
 
         for response in self.make_request(request):
@@ -90,7 +95,7 @@ def submit(self, request: TextGenerationRequest) -> TextGenerationResult:
                 result.output_token(response.add_token)
             elif response.type_ == "final":
                 result.end(
-                    response.output,
+                    response.output or "",
                     response.prompt_token_count,
                     response.output_token_count,
                 )
diff --git a/src/guidellm/core/distribution.py b/src/guidellm/core/distribution.py
index 2a84ba2..97133ec 100644
--- a/src/guidellm/core/distribution.py
+++ b/src/guidellm/core/distribution.py
@@ -1,4 +1,4 @@
-from typing import List, Union
+from typing import List, Optional, Union
 
 import numpy as np
 from loguru import logger
@@ -16,7 +16,7 @@ class Distribution:
     :type data: List[Union[int, float]], optional
     """
 
-    def __init__(self, data: List[Union[int, float]] = None):
+    def __init__(self, data: Optional[List[Union[int, float]]] = None):
         """
         Initialize the Distribution with optional data.
 
diff --git a/src/guidellm/core/result.py b/src/guidellm/core/result.py
index a72a03b..f1a3867 100644
--- a/src/guidellm/core/result.py
+++ b/src/guidellm/core/result.py
@@ -70,7 +70,7 @@ def __str__(self) -> str:
             f"end_time={self._end_time})"
         )
 
-    def __eq__(self, other: "TextGenerationResult") -> bool:
+    def __eq__(self, other: Any) -> bool:
         """
         Check equality between two TextGenerationResult instances.
 
@@ -79,15 +79,19 @@ def __eq__(self, other: "TextGenerationResult") -> bool:
         :return: True if the instances are equal, False otherwise.
         :rtype: bool
         """
-        return (
-            self._request == other._request
-            and self._prompt == other._prompt
-            and self._output == other._output
-            and self._start_time == other._start_time
-            and self._end_time == other._end_time
-            and self._first_token_time == other._first_token_time
-            and self._decode_times == other._decode_times
-        )
+
+        if not isinstance(other, TextGenerationResult):
+            raise TypeError(f"Operations only with {type(self)} are allowed.")
+        else:
+            return (
+                self._request == other._request
+                and self._prompt == other._prompt
+                and self._output == other._output
+                and self._start_time == other._start_time
+                and self._end_time == other._end_time
+                and self._first_token_time == other._first_token_time
+                and self._decode_times == other._decode_times
+            )
 
     @property
     def request(self) -> TextGenerationRequest:
@@ -120,23 +124,31 @@ def output(self) -> str:
         return self._output
 
     @property
-    def start_time(self) -> Optional[float]:
+    def start_time(self) -> float:
         """
         Get the start time of the text generation.
 
         :return: The start time.
-        :rtype: Optional[float]
+        :rtype: float
         """
+
+        self._recording_started()
+        assert self._start_time
+
         return self._start_time
 
     @property
-    def end_time(self) -> Optional[float]:
+    def end_time(self) -> float:
         """
         Get the end time of the text generation.
 
         :return: The end time.
-        :rtype: Optional[float]
+        :rtype: float
         """
+
+        self._recording_started()
+        assert self._end_time
+
         return self._end_time
 
     @property
@@ -175,6 +187,25 @@ def start(self, prompt: str):
 
         logger.info(f"Text generation started with prompt: '{prompt}'")
 
+    def _recording_started(self, raise_exception: bool = True) -> bool:
+        """
+        Ensure that the benchmark text generation recording is started.
+
+        We can assume that if the `self.start_time` & `self.end_time` exist
+        then the `start()` has been called.
+        """
+
+        if (self.start_time is not None) and (self.end_time is not None):
+            return True
+        else:
+            if raise_exception is True:
+                raise ValueError(
+                    "Last time is not specified. "
+                    "Did you make the `text_generation_benchmark.start()`?"
+                )
+            else:
+                return False
+
     def output_token(self, token: str):
         """
         Add a token to the output and record the decode time.
@@ -182,14 +213,15 @@ def output_token(self, token: str):
         :param token: The decoded token.
         :type token: str
         """
+
         current_counter = perf_counter()
 
         if not self._first_token_set:
-            self._first_token_time = current_counter - self._last_time
+            self._first_token_time = current_counter - self.end_time
             self._first_token_set = True
             logger.debug(f"First token decode time: {self._first_token_time}")
         else:
-            decode_time = current_counter - self._last_time
+            decode_time = current_counter - self.end_time
             self._decode_times.add_data([decode_time])
             logger.debug(f"Token '{token}' decoded in {decode_time} seconds")
 
@@ -347,7 +379,7 @@ def __str__(self) -> str:
             f"request_rate={self.request_rate})"
         )
 
-    def __eq__(self, other: "TextGenerationBenchmark") -> bool:
+    def __eq__(self, other: Any) -> bool:
         """
         Check equality between two TextGenerationBenchmark instances.
 
@@ -356,13 +388,16 @@ def __eq__(self, other: "TextGenerationBenchmark") -> bool:
         :return: True if the instances are equal, False otherwise.
         :rtype: bool
         """
-        return (
-            self._mode == other._mode
-            and self._rate == other._rate
-            and self._results == other._results
-            and self._errors == other._errors
-            and self._concurrencies == other._concurrencies
-        )
+        if not isinstance(other, TextGenerationBenchmark):
+            raise TypeError(f"Operations only with {type(self)} are allowed.")
+        else:
+            return (
+                self._mode == other._mode
+                and self._rate == other._rate
+                and self._results == other._results
+                and self._errors == other._errors
+                and self._concurrencies == other._concurrencies
+            )
 
     def __iter__(self):
         """
@@ -450,13 +485,13 @@ def request_rate(self) -> float:
         :return: The rate of requests per second.
         :rtype: float
         """
+
         if not self._results:
             return 0.0
-
-        start_time = self._results[0].start_time
-        end_time = self._results[-1].end_time
-
-        return self.request_count / (end_time - start_time)
+        else:
+            return self.request_count / (
+                self._results[-1].end_time - self._results[0].start_time
+            )
 
     def request_started(self):
         """
@@ -545,7 +580,7 @@ def __str__(self) -> str:
             f"benchmarks_summary=[{', '.join(str(b) for b in self._benchmarks)}])"
         )
 
-    def __eq__(self, other: "TextGenerationBenchmarkReport") -> bool:
+    def __eq__(self, other: Any) -> bool:
         """
         Check equality between two TextGenerationBenchmarkReport instances.
 
@@ -554,6 +589,10 @@ def __eq__(self, other: "TextGenerationBenchmarkReport") -> bool:
         :return: True if the instances are equal, False otherwise.
         :rtype: bool
         """
+
+        if not isinstance(other, TextGenerationBenchmarkReport):
+            raise TypeError(f"Operations only with {type(self)} are allowed.")
+
         return self._benchmarks == other._benchmarks and self._args == other._args
 
     def __iter__(self):
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..d2af3fa
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,13 @@
+[tox]
+min_version = 4.0
+env_list = py38,py39,py310,py311,py312
+
+[testenv]
+deps = '.[code_quality]'
+commands = 
+    python -m ruff check src tests
+    python -m black --check src tests
+    python -m isort --check src tests
+    python -m mypy src tests
+    python -m pytest tests/unit
+