From 2437e6a62bb32916db5d8b63e002bedc34711306 Mon Sep 17 00:00:00 2001 From: Ketan Umare Date: Thu, 13 Oct 2022 16:13:44 -0700 Subject: [PATCH 01/50] wip Signed-off-by: Ketan Umare --- flytekit/extend/backend/__init__.py | 0 flytekit/extend/backend/fastapi.py | 46 +++++++++++++++++++++++++++++ flytekit/extend/backend/plugin.py | 45 ++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 flytekit/extend/backend/__init__.py create mode 100644 flytekit/extend/backend/fastapi.py create mode 100644 flytekit/extend/backend/plugin.py diff --git a/flytekit/extend/backend/__init__.py b/flytekit/extend/backend/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/flytekit/extend/backend/fastapi.py b/flytekit/extend/backend/fastapi.py new file mode 100644 index 0000000000..debaff88b8 --- /dev/null +++ b/flytekit/extend/backend/fastapi.py @@ -0,0 +1,46 @@ +import typing + +from fastapi import FastAPI +from fastapi.responses import HTMLResponse + +from flytekit.extend.backend.plugin import BackendPluginRegistry, BackendPluginBase + + +def _create_root_welcome(app: FastAPI, plugins: typing.List[BackendPluginBase]): + l = "" + for p in plugins: + l += f"
  • ID: {p.identifier}, TaskType: {p.task_type}, Version: {p.version}
  • " + + @app.get("/", response_class=HTMLResponse) + def root(): + return f""" + + + FlyteBackend Plugin Server + + +

    Flyte Backend plugin server.

    +

    Registered plugins

    +
      + {l} +
    + + + """ + + +def _serve_plugin(app: FastAPI, plugin: BackendPluginBase): + @app.post("/plugins/") + def create(): + plugin.create() + + +def serve_plugin(app: FastAPI, plugin: BackendPluginBase): + _create_root_welcome(app, [plugin]) + _serve_plugin(app, plugin) + + +def serve_all_registered_plugins(app: FastAPI): + + for plugin in BackendPluginRegistry.list_registered_plugins(): + _serve_plugin(app, plugin) diff --git a/flytekit/extend/backend/plugin.py b/flytekit/extend/backend/plugin.py new file mode 100644 index 0000000000..7b699106c1 --- /dev/null +++ b/flytekit/extend/backend/plugin.py @@ -0,0 +1,45 @@ +import typing + + +class BackendPluginBase(): + + def __init__(self, identifier: str, task_type: str, version: str = "v1"): + self._identifier = identifier + self._task_type = task_type + self._version = version + + @property + def identifier(self) -> str: + return self._identifier + + @property + def task_type(self) -> str: + return self._task_type + + @property + def version(self) -> str: + return self._version + + def initialize(self): + pass + + async def create(self): + pass + + async def poll(self): + pass + + async def terminate(self): + pass + + +class BackendPluginRegistry(object): + @staticmethod + def register(self, plugin: BackendPluginBase): + pass + + @staticmethod + def list_registered_plugins(self) -> typing.List[BackendPluginBase]: + pass + + From 063dd3bf1219068a6e13756ee202c72cb4a70deb Mon Sep 17 00:00:00 2001 From: Ketan Umare Date: Fri, 14 Oct 2022 13:49:06 -0700 Subject: [PATCH 02/50] updated Signed-off-by: Ketan Umare --- flytekit/extend/backend/fastapi.py | 32 +++++++++++++++++++++----- flytekit/extend/backend/plugin.py | 36 +++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/flytekit/extend/backend/fastapi.py b/flytekit/extend/backend/fastapi.py index debaff88b8..22c3d8dda0 100644 --- a/flytekit/extend/backend/fastapi.py +++ b/flytekit/extend/backend/fastapi.py @@ -1,4 +1,5 @@ import typing +from http import HTTPStatus from fastapi import FastAPI from fastapi.responses import HTMLResponse @@ -29,18 +30,39 @@ def root(): """ +def _create_healthcheck(app: FastAPI): + @app.get("/health") + def health(): + return {"message": HTTPStatus.OK.phrase, "status": HTTPStatus.OK} + + def _serve_plugin(app: FastAPI, plugin: BackendPluginBase): - @app.post("/plugins/") - def create(): - plugin.create() + @app.post(f"/plugins/v1/{plugin.identifier}/{plugin.version}/") + async def create(): + return await plugin.create() + + @app.delete(f"/plugins/v1/{plugin.identifier}/{plugin.version}/") + async def delete(): + return await plugin.terminate() + + @app.get(f"/plugins/v1/{plugin.identifier}/{plugin.version}/") + async def poll(): + return await plugin.poll() def serve_plugin(app: FastAPI, plugin: BackendPluginBase): _create_root_welcome(app, [plugin]) + _create_healthcheck(app) _serve_plugin(app, plugin) def serve_all_registered_plugins(app: FastAPI): - - for plugin in BackendPluginRegistry.list_registered_plugins(): + plugins = BackendPluginRegistry.list_registered_plugins() + _create_root_welcome(app, plugins) + _create_healthcheck(app) + for plugin in plugins: _serve_plugin(app, plugin) + + +app = FastAPI() +serve_all_registered_plugins(app) \ No newline at end of file diff --git a/flytekit/extend/backend/plugin.py b/flytekit/extend/backend/plugin.py index 7b699106c1..20b184a86e 100644 --- a/flytekit/extend/backend/plugin.py +++ b/flytekit/extend/backend/plugin.py @@ -1,4 +1,5 @@ import typing +from abc import abstractmethod class BackendPluginBase(): @@ -20,26 +21,51 @@ def task_type(self) -> str: def version(self) -> str: return self._version - def initialize(self): + @abstractmethod + async def initialize(self): pass + @abstractmethod async def create(self): pass + @abstractmethod async def poll(self): pass + @abstractmethod async def terminate(self): pass class BackendPluginRegistry(object): + _REGISTRY = [] + @staticmethod - def register(self, plugin: BackendPluginBase): - pass + def register(plugin: BackendPluginBase): + BackendPluginRegistry._REGISTRY.append(plugin) @staticmethod - def list_registered_plugins(self) -> typing.List[BackendPluginBase]: - pass + def list_registered_plugins() -> typing.List[BackendPluginBase]: + return BackendPluginRegistry._REGISTRY + + +class DummyPlugin(BackendPluginBase): + def __init__(self, identifier="x"): + super().__init__(identifier=identifier, task_type="my-task") + + async def initialize(self): + return "Hello World" + + async def create(self): + return "

    In Create

    " + + async def poll(self): + return "

    In Poll

    " + + async def terminate(self): + return "

    In Terminate

    " +for i in range(50): + BackendPluginRegistry.register(DummyPlugin(f"x-{i}")) \ No newline at end of file From cd146582ac19fa448aa7139ea906984cb542c600 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 20 Jan 2023 17:04:48 -0800 Subject: [PATCH 03/50] Add bq plugin and refactor Signed-off-by: Kevin Su --- flytekit/core/base_sql_task.py | 2 + flytekit/core/constants.py | 1 + flytekit/core/utils.py | 6 ++ flytekit/extend/backend/__init__.py | 1 + flytekit/extend/backend/base_plugin.py | 72 +++++++++++++++++++ flytekit/extend/backend/bigquey.py | 36 ++++++++++ flytekit/extend/backend/fastapi.py | 42 +++++------ flytekit/extend/backend/plugin.py | 71 ------------------ .../flytekitplugins/bigquery/task.py | 7 ++ .../flytekitplugins/spark/task.py | 1 - 10 files changed, 147 insertions(+), 92 deletions(-) create mode 100644 flytekit/extend/backend/base_plugin.py create mode 100644 flytekit/extend/backend/bigquey.py delete mode 100644 flytekit/extend/backend/plugin.py diff --git a/flytekit/core/base_sql_task.py b/flytekit/core/base_sql_task.py index 78f4341839..495eaa5b19 100644 --- a/flytekit/core/base_sql_task.py +++ b/flytekit/core/base_sql_task.py @@ -1,7 +1,9 @@ +import os import re from typing import Any, Dict, Optional, Type, TypeVar from flytekit.core.base_task import PythonTask, TaskMetadata +from flytekit.core.constants import ENABLE_BACKEND_SYSTEM_SERVICE from flytekit.core.interface import Interface T = TypeVar("T") diff --git a/flytekit/core/constants.py b/flytekit/core/constants.py index cda20602b2..cb2f97e0c6 100644 --- a/flytekit/core/constants.py +++ b/flytekit/core/constants.py @@ -2,6 +2,7 @@ OUTPUT_FILE_NAME = "outputs.pb" FUTURES_FILE_NAME = "futures.pb" ERROR_FILE_NAME = "error.pb" +ENABLE_BACKEND_SYSTEM_SERVICE = "ENABLE_BACKEND_SYSTEM_SERVICE" class SdkTaskType(object): diff --git a/flytekit/core/utils.py b/flytekit/core/utils.py index d23aae3fbb..0ee8a057b9 100644 --- a/flytekit/core/utils.py +++ b/flytekit/core/utils.py @@ -1,3 +1,4 @@ +import os import os as _os import shutil as _shutil import tempfile as _tempfile @@ -6,6 +7,7 @@ from pathlib import Path from typing import Dict, List, Optional +from flytekit.core.constants import ENABLE_BACKEND_SYSTEM_SERVICE from flytekit.loggers import logger from flytekit.models import task as _task_models @@ -233,3 +235,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): end_process_time - self._start_process_time, ) ) + + +def is_backend_plugin_service_enabled(): + return os.environ.get(ENABLE_BACKEND_SYSTEM_SERVICE).lower() == "true" diff --git a/flytekit/extend/backend/__init__.py b/flytekit/extend/backend/__init__.py index e69de29bb2..9d5d2dad94 100644 --- a/flytekit/extend/backend/__init__.py +++ b/flytekit/extend/backend/__init__.py @@ -0,0 +1 @@ +from .bigquey import BigQueryPlugin diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py new file mode 100644 index 0000000000..20c0c158eb --- /dev/null +++ b/flytekit/extend/backend/base_plugin.py @@ -0,0 +1,72 @@ +import typing +from abc import abstractmethod + +from pydantic import BaseModel + + +class CreateRequest(BaseModel): + inputs_path: str + output_prefix: str + task_template_path: str + bq_token_name: str # token should be saved in the k8s secret + + +class CreateResponse(BaseModel): + job_id: str + + +class PollResponse(BaseModel): + job_id: str + state: str + + +class BackendPluginBase: + def __init__(self, task_type: str, version: str = "v1"): + self._task_type = task_type + self._version = version + + @property + def task_type(self) -> str: + return self._task_type + + @property + def version(self) -> str: + return self._version + + @abstractmethod + async def initialize(self): + pass + + @abstractmethod + async def create(self, create_request: CreateRequest) -> CreateResponse: + pass + + @abstractmethod + async def poll(self, job_id: str) -> PollResponse: + pass + + @abstractmethod + async def terminate(self, job_id: str): + pass + + +class BackendPluginRegistry(object): + _REGISTRY = [] + + @staticmethod + def register(plugin: BackendPluginBase): + BackendPluginRegistry._REGISTRY.append(plugin) + + @staticmethod + def list_registered_plugins() -> typing.List[BackendPluginBase]: + return BackendPluginRegistry._REGISTRY + + +def convert_to_flyte_state(state: str): + if state.lower() in ["pending"]: + return "pending" + if state.lower() in ["done", "succeeded"]: + return "succeeded" + if state.lower() in ["running"]: + return "running" + raise ValueError("Unrecognize state") diff --git a/flytekit/extend/backend/bigquey.py b/flytekit/extend/backend/bigquey.py new file mode 100644 index 0000000000..d38c9585e2 --- /dev/null +++ b/flytekit/extend/backend/bigquey.py @@ -0,0 +1,36 @@ +from google.cloud import bigquery + +from flytekit.extend.backend.base_plugin import ( + BackendPluginBase, + BackendPluginRegistry, + CreateRequest, + CreateResponse, + PollResponse, + convert_to_flyte_state, +) + + +class BigQueryPlugin(BackendPluginBase): + def __init__(self): + super().__init__(task_type="bigquery", version="v1") + + async def initialize(self): + return "Hello World" + + async def create(self, create_request: CreateRequest) -> CreateResponse: + client = bigquery.Client() + QUERY = "SELECT 1" + query_job = client.query(QUERY) + return CreateResponse(job_id=query_job.job_id) + + async def poll(self, job_id) -> PollResponse: + client = bigquery.Client() + job = client.get_job(job_id) + return PollResponse(job_id=job.job_id, state=convert_to_flyte_state(job.state)) + + async def terminate(self, job_id): + client = bigquery.Client() + client.cancel_job(job_id) + + +BackendPluginRegistry.register(BigQueryPlugin()) diff --git a/flytekit/extend/backend/fastapi.py b/flytekit/extend/backend/fastapi.py index 22c3d8dda0..dfbf8a03a6 100644 --- a/flytekit/extend/backend/fastapi.py +++ b/flytekit/extend/backend/fastapi.py @@ -4,13 +4,21 @@ from fastapi import FastAPI from fastapi.responses import HTMLResponse -from flytekit.extend.backend.plugin import BackendPluginRegistry, BackendPluginBase +from flytekit.extend.backend.base_plugin import ( + BackendPluginBase, + BackendPluginRegistry, + CreateRequest, + CreateResponse, + PollResponse, +) + +PLUGINS_V1 = "/plugins/v1" def _create_root_welcome(app: FastAPI, plugins: typing.List[BackendPluginBase]): l = "" for p in plugins: - l += f"
  • ID: {p.identifier}, TaskType: {p.task_type}, Version: {p.version}
  • " + l += f"
  • TaskType: {p.task_type}, Version: {p.version}
  • " @app.get("/", response_class=HTMLResponse) def root(): @@ -30,39 +38,33 @@ def root(): """ -def _create_healthcheck(app: FastAPI): +def _create_health_check(app: FastAPI): @app.get("/health") def health(): return {"message": HTTPStatus.OK.phrase, "status": HTTPStatus.OK} def _serve_plugin(app: FastAPI, plugin: BackendPluginBase): - @app.post(f"/plugins/v1/{plugin.identifier}/{plugin.version}/") - async def create(): - return await plugin.create() - - @app.delete(f"/plugins/v1/{plugin.identifier}/{plugin.version}/") - async def delete(): - return await plugin.terminate() - - @app.get(f"/plugins/v1/{plugin.identifier}/{plugin.version}/") - async def poll(): - return await plugin.poll() + @app.post(f"{PLUGINS_V1}/{plugin.task_type}/{plugin.version}/", response_model=CreateResponse) + async def create(create_request: CreateRequest): + return await plugin.create(create_request) + @app.delete(f"{PLUGINS_V1}/{plugin.task_type}/{plugin.version}/") + async def terminate(job_id: str): + return await plugin.terminate(job_id) -def serve_plugin(app: FastAPI, plugin: BackendPluginBase): - _create_root_welcome(app, [plugin]) - _create_healthcheck(app) - _serve_plugin(app, plugin) + @app.get(f"{PLUGINS_V1}/{plugin.task_type}/{plugin.version}/", response_model=PollResponse) + async def poll(job_id: str): + return await plugin.poll(job_id) def serve_all_registered_plugins(app: FastAPI): plugins = BackendPluginRegistry.list_registered_plugins() _create_root_welcome(app, plugins) - _create_healthcheck(app) + _create_health_check(app) for plugin in plugins: _serve_plugin(app, plugin) app = FastAPI() -serve_all_registered_plugins(app) \ No newline at end of file +serve_all_registered_plugins(app) diff --git a/flytekit/extend/backend/plugin.py b/flytekit/extend/backend/plugin.py deleted file mode 100644 index 20b184a86e..0000000000 --- a/flytekit/extend/backend/plugin.py +++ /dev/null @@ -1,71 +0,0 @@ -import typing -from abc import abstractmethod - - -class BackendPluginBase(): - - def __init__(self, identifier: str, task_type: str, version: str = "v1"): - self._identifier = identifier - self._task_type = task_type - self._version = version - - @property - def identifier(self) -> str: - return self._identifier - - @property - def task_type(self) -> str: - return self._task_type - - @property - def version(self) -> str: - return self._version - - @abstractmethod - async def initialize(self): - pass - - @abstractmethod - async def create(self): - pass - - @abstractmethod - async def poll(self): - pass - - @abstractmethod - async def terminate(self): - pass - - -class BackendPluginRegistry(object): - _REGISTRY = [] - - @staticmethod - def register(plugin: BackendPluginBase): - BackendPluginRegistry._REGISTRY.append(plugin) - - @staticmethod - def list_registered_plugins() -> typing.List[BackendPluginBase]: - return BackendPluginRegistry._REGISTRY - - -class DummyPlugin(BackendPluginBase): - def __init__(self, identifier="x"): - super().__init__(identifier=identifier, task_type="my-task") - - async def initialize(self): - return "Hello World" - - async def create(self): - return "

    In Create

    " - - async def poll(self): - return "

    In Poll

    " - - async def terminate(self): - return "

    In Terminate

    " - - -for i in range(50): - BackendPluginRegistry.register(DummyPlugin(f"x-{i}")) \ No newline at end of file diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py index 1d4a7f0dbd..0ddf684482 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py @@ -7,6 +7,7 @@ from flytekit import StructuredDataset from flytekit.configuration import SerializationSettings +from flytekit.core.utils import is_backend_plugin_service_enabled from flytekit.extend import SQLTask from flytekit.models import task as _task_model @@ -81,3 +82,9 @@ def get_custom(self, settings: SerializationSettings) -> Dict[str, Any]: def get_sql(self, settings: SerializationSettings) -> Optional[_task_model.Sql]: sql = _task_model.Sql(statement=self.query_template, dialect=_task_model.Sql.Dialect.ANSI) return sql + + def execute(self, **kwargs) -> Any: + if not is_backend_plugin_service_enabled(): + raise Exception("Backend plugin service is not enabled") + + raise Exception("Cannot run a SQL Task natively, please mock.") diff --git a/plugins/flytekit-spark/flytekitplugins/spark/task.py b/plugins/flytekit-spark/flytekitplugins/spark/task.py index 8428b492ce..6cf5eae92c 100644 --- a/plugins/flytekit-spark/flytekitplugins/spark/task.py +++ b/plugins/flytekit-spark/flytekitplugins/spark/task.py @@ -69,7 +69,6 @@ def new_spark_session(name: str, conf: typing.Dict[str, str] = None): # If there is a global SparkSession available, get it and try to stop it. _pyspark.sql.SparkSession.builder.getOrCreate().stop() - return sess_builder.getOrCreate() # SparkSession.Stop does not work correctly, as it stops the session before all the data is written # sess.stop() From 93a4ed27a67a52a35962550e026f7053897a2393 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 23 Jan 2023 21:59:58 -0800 Subject: [PATCH 04/50] Add dummy plugin for performance testing Signed-off-by: Kevin Su --- flytekit/core/base_sql_task.py | 2 - flytekit/extend/backend/__init__.py | 2 +- flytekit/extend/backend/base_plugin.py | 39 ++++---- flytekit/extend/backend/bigquey.py | 36 -------- flytekit/extend/backend/dummy_plugin.py | 63 +++++++++++++ flytekit/extend/backend/fastapi.py | 17 ++-- flytekit/extend/backend/utils.py | 38 ++++++++ .../flytekitplugins/bigquery/__init__.py | 1 + .../bigquery/backend_plugin.py | 88 +++++++++++++++++++ plugins/flytekit-bigquery/setup.py | 1 + 10 files changed, 223 insertions(+), 64 deletions(-) delete mode 100644 flytekit/extend/backend/bigquey.py create mode 100644 flytekit/extend/backend/dummy_plugin.py create mode 100644 flytekit/extend/backend/utils.py create mode 100644 plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py diff --git a/flytekit/core/base_sql_task.py b/flytekit/core/base_sql_task.py index 495eaa5b19..78f4341839 100644 --- a/flytekit/core/base_sql_task.py +++ b/flytekit/core/base_sql_task.py @@ -1,9 +1,7 @@ -import os import re from typing import Any, Dict, Optional, Type, TypeVar from flytekit.core.base_task import PythonTask, TaskMetadata -from flytekit.core.constants import ENABLE_BACKEND_SYSTEM_SERVICE from flytekit.core.interface import Interface T = TypeVar("T") diff --git a/flytekit/extend/backend/__init__.py b/flytekit/extend/backend/__init__.py index 9d5d2dad94..5061c6ea1b 100644 --- a/flytekit/extend/backend/__init__.py +++ b/flytekit/extend/backend/__init__.py @@ -1 +1 @@ -from .bigquey import BigQueryPlugin +from .dummy_plugin import DummyPlugin diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 20c0c158eb..8f652ffc12 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -1,38 +1,41 @@ -import typing from abc import abstractmethod +from typing import List, Optional from pydantic import BaseModel +PENDING = "pending" +SUCCEEDED = "succeeded" +RUNNING = "running" + class CreateRequest(BaseModel): inputs_path: str - output_prefix: str task_template_path: str - bq_token_name: str # token should be saved in the k8s secret class CreateResponse(BaseModel): job_id: str + message: Optional[str] -class PollResponse(BaseModel): +class PollRequest(BaseModel): job_id: str + output_prefix: str + + +class PollResponse(BaseModel): state: str + message: Optional[str] class BackendPluginBase: - def __init__(self, task_type: str, version: str = "v1"): + def __init__(self, task_type: str): self._task_type = task_type - self._version = version @property def task_type(self) -> str: return self._task_type - @property - def version(self) -> str: - return self._version - @abstractmethod async def initialize(self): pass @@ -42,7 +45,7 @@ async def create(self, create_request: CreateRequest) -> CreateResponse: pass @abstractmethod - async def poll(self, job_id: str) -> PollResponse: + async def poll(self, poll_request: PollRequest) -> PollResponse: pass @abstractmethod @@ -58,15 +61,15 @@ def register(plugin: BackendPluginBase): BackendPluginRegistry._REGISTRY.append(plugin) @staticmethod - def list_registered_plugins() -> typing.List[BackendPluginBase]: + def list_registered_plugins() -> List[BackendPluginBase]: return BackendPluginRegistry._REGISTRY def convert_to_flyte_state(state: str): - if state.lower() in ["pending"]: - return "pending" - if state.lower() in ["done", "succeeded"]: - return "succeeded" - if state.lower() in ["running"]: - return "running" + if state.lower() in [PENDING]: + return PENDING + if state.lower() in ["done", SUCCEEDED]: + return SUCCEEDED + if state.lower() in [RUNNING]: + return RUNNING raise ValueError("Unrecognize state") diff --git a/flytekit/extend/backend/bigquey.py b/flytekit/extend/backend/bigquey.py deleted file mode 100644 index d38c9585e2..0000000000 --- a/flytekit/extend/backend/bigquey.py +++ /dev/null @@ -1,36 +0,0 @@ -from google.cloud import bigquery - -from flytekit.extend.backend.base_plugin import ( - BackendPluginBase, - BackendPluginRegistry, - CreateRequest, - CreateResponse, - PollResponse, - convert_to_flyte_state, -) - - -class BigQueryPlugin(BackendPluginBase): - def __init__(self): - super().__init__(task_type="bigquery", version="v1") - - async def initialize(self): - return "Hello World" - - async def create(self, create_request: CreateRequest) -> CreateResponse: - client = bigquery.Client() - QUERY = "SELECT 1" - query_job = client.query(QUERY) - return CreateResponse(job_id=query_job.job_id) - - async def poll(self, job_id) -> PollResponse: - client = bigquery.Client() - job = client.get_job(job_id) - return PollResponse(job_id=job.job_id, state=convert_to_flyte_state(job.state)) - - async def terminate(self, job_id): - client = bigquery.Client() - client.cancel_job(job_id) - - -BackendPluginRegistry.register(BigQueryPlugin()) diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py new file mode 100644 index 0000000000..bdb53806aa --- /dev/null +++ b/flytekit/extend/backend/dummy_plugin.py @@ -0,0 +1,63 @@ +from random import randint +from time import sleep + +from flytekit import FlyteContextManager, StructuredDataset +from flytekit.core import constants +from flytekit.core.type_engine import TypeEngine +from flytekit.extend.backend.base_plugin import ( + RUNNING, + SUCCEEDED, + BackendPluginBase, + BackendPluginRegistry, + CreateRequest, + CreateResponse, + PollRequest, + PollResponse, +) +from flytekit.extend.backend.utils import get_task_inputs, get_task_template, upload_output_file +from flytekit.models import literals, task +from flytekit.models.types import LiteralType, StructuredDatasetType + + +# This plugin is used for performance benchmarking +class DummyPlugin(BackendPluginBase): + def __init__(self): + super().__init__(task_type="dummy") + + async def initialize(self): + pass + + async def create(self, create_request: CreateRequest) -> CreateResponse: + _ = get_task_template(create_request.task_template_path) + _ = get_task_inputs(create_request.inputs_path) + sleep(1) + + return CreateResponse(job_id="fake_id") + + async def poll(self, poll_request: PollRequest) -> PollResponse: + x = randint(0, 100) + state = RUNNING + if x < 20: + ctx = FlyteContextManager.current_context() + output_file_dict = { + constants.OUTPUT_FILE_NAME: literals.LiteralMap( + { + "results": TypeEngine.to_literal( + ctx, + StructuredDataset(uri="fake_uri"), + StructuredDataset, + LiteralType(structured_dataset_type=StructuredDatasetType), + ) + } + ) + } + upload_output_file(output_file_dict, poll_request.output_prefix) + state = SUCCEEDED + + return PollResponse(state=state) + + async def terminate(self, job_id): + sleep(1) + + +BackendPluginRegistry.register(DummyPlugin()) diff --git a/flytekit/extend/backend/fastapi.py b/flytekit/extend/backend/fastapi.py index dfbf8a03a6..955444495f 100644 --- a/flytekit/extend/backend/fastapi.py +++ b/flytekit/extend/backend/fastapi.py @@ -4,21 +4,23 @@ from fastapi import FastAPI from fastapi.responses import HTMLResponse +from flytekit import __version__ from flytekit.extend.backend.base_plugin import ( BackendPluginBase, BackendPluginRegistry, CreateRequest, CreateResponse, + PollRequest, PollResponse, ) -PLUGINS_V1 = "/plugins/v1" +PLUGINS_V1 = "plugins/v1" def _create_root_welcome(app: FastAPI, plugins: typing.List[BackendPluginBase]): l = "" for p in plugins: - l += f"
  • TaskType: {p.task_type}, Version: {p.version}
  • " + l += f"
  • TaskType: {p.task_type}, Version: {__version__}
  • " @app.get("/", response_class=HTMLResponse) def root(): @@ -45,17 +47,17 @@ def health(): def _serve_plugin(app: FastAPI, plugin: BackendPluginBase): - @app.post(f"{PLUGINS_V1}/{plugin.task_type}/{plugin.version}/", response_model=CreateResponse) + @app.post(f"/{PLUGINS_V1}/{plugin.task_type}", response_model=CreateResponse) async def create(create_request: CreateRequest): return await plugin.create(create_request) - @app.delete(f"{PLUGINS_V1}/{plugin.task_type}/{plugin.version}/") + @app.delete(f"/{PLUGINS_V1}/{plugin.task_type}") async def terminate(job_id: str): return await plugin.terminate(job_id) - @app.get(f"{PLUGINS_V1}/{plugin.task_type}/{plugin.version}/", response_model=PollResponse) - async def poll(job_id: str): - return await plugin.poll(job_id) + @app.get(f"/{PLUGINS_V1}/{plugin.task_type}", response_model=PollResponse) + async def poll(poll_request: PollRequest): + return await plugin.poll(poll_request) def serve_all_registered_plugins(app: FastAPI): @@ -63,6 +65,7 @@ def serve_all_registered_plugins(app: FastAPI): _create_root_welcome(app, plugins) _create_health_check(app) for plugin in plugins: + plugin.initialize() _serve_plugin(app, plugin) diff --git a/flytekit/extend/backend/utils.py b/flytekit/extend/backend/utils.py new file mode 100644 index 0000000000..2b56b7bb36 --- /dev/null +++ b/flytekit/extend/backend/utils.py @@ -0,0 +1,38 @@ +import os +import typing + +from flyteidl.core import literals_pb2, tasks_pb2 + +from flytekit import FlyteContextManager, logger +from flytekit.core import utils +from flytekit.core.utils import load_proto_from_file +from flytekit.models import literals, task +from flytekit.models.literals import LiteralMap +from flytekit.models.task import TaskTemplate + + +def get_task_template(task_template_path: str) -> TaskTemplate: + ctx = FlyteContextManager.current_context() + task_template_local_path = os.path.join(ctx.execution_state.working_dir, "task_template.pb") + ctx.file_access.get_data(task_template_path, task_template_local_path) + task_template_proto = load_proto_from_file(tasks_pb2.TaskTemplate, task_template_local_path) + task_template_model = task.TaskTemplate.from_flyte_idl(task_template_proto) + print(f"Task Template: {task_template_model}") # For debug, will remove it + return task_template_model + + +def get_task_inputs(inputs_path: str) -> LiteralMap: + ctx = FlyteContextManager.current_context() + task_inputs_local_path = os.path.join(ctx.execution_state.working_dir, "inputs.pb") + ctx.file_access.get_data(inputs_path, task_inputs_local_path) + input_proto = utils.load_proto_from_file(literals_pb2.LiteralMap, task_inputs_local_path) + idl_input_literals = literals.LiteralMap.from_flyte_idl(input_proto) + logger.debug(f"Task inputs: {idl_input_literals}") + return idl_input_literals + + +def upload_output_file(output_file_dict: typing.Dict, output_prefix: str): + ctx = FlyteContextManager.current_context() + for k, v in output_file_dict.items(): + utils.write_proto_to_file(v.to_flyte_idl(), os.path.join(ctx.execution_state.engine_dir, k)) + ctx.file_access.put_data(ctx.execution_state.engine_dir, output_prefix, is_multipart=True) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/__init__.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/__init__.py index cba899669b..416a021516 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/__init__.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/__init__.py @@ -11,4 +11,5 @@ BigQueryTask """ +from .backend_plugin import BigQueryPlugin from .task import BigQueryConfig, BigQueryTask diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py new file mode 100644 index 0000000000..3ba2f24446 --- /dev/null +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -0,0 +1,88 @@ +from typing import Dict + +from google.cloud import bigquery + +from flytekit import FlyteContextManager, StructuredDataset +from flytekit.core import constants +from flytekit.core.type_engine import TypeEngine +from flytekit.extend.backend.base_plugin import ( + SUCCEEDED, + BackendPluginBase, + BackendPluginRegistry, + CreateRequest, + CreateResponse, + PollRequest, + PollResponse, + convert_to_flyte_state, +) +from flytekit.extend.backend.utils import get_task_inputs, get_task_template, upload_output_file +from flytekit.models import literals +from flytekit.models.types import LiteralType, StructuredDatasetType + +pythonTypeToBigQueryType: Dict[type, str] = { + str: "STRING", + int: "INT64", +} + + +class BigQueryPlugin(BackendPluginBase): + def __init__(self): + super().__init__(task_type="bigquery") + + async def initialize(self): + # TODO: Read GOOGLE_APPLICATION_CREDENTIALS from secret. If not found, raise an error. + pass + + async def create(self, create_request: CreateRequest) -> CreateResponse: + ctx = FlyteContextManager.current_context() + task_template = get_task_template(create_request.task_template_path) + task_input_literals = get_task_inputs(create_request.inputs_path) + + # 3. Submit the job + # TODO: is there any other way to get python interface input? + python_interface_inputs = { + name: TypeEngine.guess_python_type(lt.type) for name, lt in task_template.interface.inputs.items() + } + native_inputs = TypeEngine.literal_map_to_kwargs(ctx, task_input_literals, python_interface_inputs) + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val) + for name, val in native_inputs.items() + ] + ) + + custom = task_template.custom + client = bigquery.Client(project=custom["ProjectID"], location=custom["Location"]) + query_job = client.query(task_template.sql.statement, job_config=job_config) + + return CreateResponse(job_id=query_job.job_id) + + async def poll(self, poll_request: PollRequest) -> PollResponse: + client = bigquery.Client() + job = client.get_job(poll_request.job_id) + state = convert_to_flyte_state(str(job.state)) + if state == SUCCEEDED: + ctx = FlyteContextManager.current_context() + output_location = f"bq://{job.destination.project}:{job.destination.dataset_id}.{job.destination.table_id}" + output_file_dict = { + constants.OUTPUT_FILE_NAME: literals.LiteralMap( + { + "results": TypeEngine.to_literal( + ctx, + StructuredDataset(uri=output_location), + StructuredDataset, + LiteralType(structured_dataset_type=StructuredDatasetType(format="")), + ) + } + ) + } + upload_output_file(output_file_dict, poll_request.output_prefix) + + return PollResponse(job_id=job.job_id, state=state) + + async def terminate(self, job_id): + client = bigquery.Client() + client.cancel_job(job_id) + + +BackendPluginRegistry.register(BigQueryPlugin()) diff --git a/plugins/flytekit-bigquery/setup.py b/plugins/flytekit-bigquery/setup.py index 0e7eed5d9d..d91f58f881 100644 --- a/plugins/flytekit-bigquery/setup.py +++ b/plugins/flytekit-bigquery/setup.py @@ -33,4 +33,5 @@ "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules", ], + entry_points={"flytekit.plugins": [f"{PLUGIN_NAME}=flytekitplugins.{PLUGIN_NAME}"]}, ) From a2a53055b78af0bad0c5eb1248982117646493e2 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 23 Jan 2023 22:06:15 -0800 Subject: [PATCH 05/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/dummy_plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index bdb53806aa..06381afaaa 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -37,7 +37,7 @@ async def create(self, create_request: CreateRequest) -> CreateResponse: async def poll(self, poll_request: PollRequest) -> PollResponse: x = randint(0, 100) state = RUNNING - if x < 20: + if x < 50: ctx = FlyteContextManager.current_context() output_file_dict = { constants.OUTPUT_FILE_NAME: literals.LiteralMap( @@ -46,7 +46,7 @@ async def poll(self, poll_request: PollRequest) -> PollResponse: ctx, StructuredDataset(uri="fake_uri"), StructuredDataset, - LiteralType(structured_dataset_type=StructuredDatasetType), + LiteralType(structured_dataset_type=StructuredDatasetType(format="")), ) } ) From f6b0d818464cdd2a3615aa7cda26ac48b88ff70c Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 25 Jan 2023 02:49:38 -0800 Subject: [PATCH 06/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/dummy_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index 06381afaaa..bae29d5175 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -37,7 +37,7 @@ async def create(self, create_request: CreateRequest) -> CreateResponse: async def poll(self, poll_request: PollRequest) -> PollResponse: x = randint(0, 100) state = RUNNING - if x < 50: + if x < 90: ctx = FlyteContextManager.current_context() output_file_dict = { constants.OUTPUT_FILE_NAME: literals.LiteralMap( From 84ffbfe5a8596edb13249172d60fdb90da20a296 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 25 Jan 2023 11:45:13 -0800 Subject: [PATCH 07/50] test Signed-off-by: Kevin Su --- flytekit/extend/backend/dummy_plugin.py | 33 +++++++++++-------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index bae29d5175..f832b71fa6 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -35,24 +35,21 @@ async def create(self, create_request: CreateRequest) -> CreateResponse: return CreateResponse(job_id="fake_id") async def poll(self, poll_request: PollRequest) -> PollResponse: - x = randint(0, 100) - state = RUNNING - if x < 90: - ctx = FlyteContextManager.current_context() - output_file_dict = { - constants.OUTPUT_FILE_NAME: literals.LiteralMap( - { - "results": TypeEngine.to_literal( - ctx, - StructuredDataset(uri="fake_uri"), - StructuredDataset, - LiteralType(structured_dataset_type=StructuredDatasetType(format="")), - ) - } - ) - } - upload_output_file(output_file_dict, poll_request.output_prefix) - state = SUCCEEDED + ctx = FlyteContextManager.current_context() + output_file_dict = { + constants.OUTPUT_FILE_NAME: literals.LiteralMap( + { + "results": TypeEngine.to_literal( + ctx, + StructuredDataset(uri="fake_uri"), + StructuredDataset, + LiteralType(structured_dataset_type=StructuredDatasetType(format="")), + ) + } + ) + } + upload_output_file(output_file_dict, poll_request.output_prefix) + state = SUCCEEDED return PollResponse(state=state) From b39fe489be01e9d54f6be917aaf78b9d6359aa27 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 25 Jan 2023 11:57:22 -0800 Subject: [PATCH 08/50] test Signed-off-by: Kevin Su --- flytekit/extend/backend/dummy_plugin.py | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index f832b71fa6..79acde6718 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -35,20 +35,20 @@ async def create(self, create_request: CreateRequest) -> CreateResponse: return CreateResponse(job_id="fake_id") async def poll(self, poll_request: PollRequest) -> PollResponse: - ctx = FlyteContextManager.current_context() - output_file_dict = { - constants.OUTPUT_FILE_NAME: literals.LiteralMap( - { - "results": TypeEngine.to_literal( - ctx, - StructuredDataset(uri="fake_uri"), - StructuredDataset, - LiteralType(structured_dataset_type=StructuredDatasetType(format="")), - ) - } - ) - } - upload_output_file(output_file_dict, poll_request.output_prefix) + # ctx = FlyteContextManager.current_context() + # output_file_dict = { + # constants.OUTPUT_FILE_NAME: literals.LiteralMap( + # { + # "results": TypeEngine.to_literal( + # ctx, + # StructuredDataset(uri="fake_uri"), + # StructuredDataset, + # LiteralType(structured_dataset_type=StructuredDatasetType(format="")), + # ) + # } + # ) + # } + # upload_output_file(output_file_dict, poll_request.output_prefix) state = SUCCEEDED return PollResponse(state=state) From 8ba641cf734b6f7d13a8e66c5b158cc6b68d5118 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 16 Feb 2023 11:34:13 -0800 Subject: [PATCH 09/50] test Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 1 + flytekit/extend/backend/dummy_plugin.py | 39 +++++++++++++++---------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 8f652ffc12..75349fe8db 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -21,6 +21,7 @@ class CreateResponse(BaseModel): class PollRequest(BaseModel): job_id: str output_prefix: str + prev_state: str class PollResponse(BaseModel): diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index 79acde6718..e8dac7245f 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -15,7 +15,7 @@ PollResponse, ) from flytekit.extend.backend.utils import get_task_inputs, get_task_template, upload_output_file -from flytekit.models import literals, task +from flytekit.models import literals from flytekit.models.types import LiteralType, StructuredDatasetType @@ -35,21 +35,28 @@ async def create(self, create_request: CreateRequest) -> CreateResponse: return CreateResponse(job_id="fake_id") async def poll(self, poll_request: PollRequest) -> PollResponse: - # ctx = FlyteContextManager.current_context() - # output_file_dict = { - # constants.OUTPUT_FILE_NAME: literals.LiteralMap( - # { - # "results": TypeEngine.to_literal( - # ctx, - # StructuredDataset(uri="fake_uri"), - # StructuredDataset, - # LiteralType(structured_dataset_type=StructuredDatasetType(format="")), - # ) - # } - # ) - # } - # upload_output_file(output_file_dict, poll_request.output_prefix) - state = SUCCEEDED + if poll_request.prev_state == SUCCEEDED: + return PollResponse(state=SUCCEEDED) + + x = randint(1, 100) + if x > 50: + ctx = FlyteContextManager.current_context() + output_file_dict = { + constants.OUTPUT_FILE_NAME: literals.LiteralMap( + { + "results": TypeEngine.to_literal( + ctx, + StructuredDataset(uri="fake_uri"), + StructuredDataset, + LiteralType(structured_dataset_type=StructuredDatasetType(format="")), + ) + } + ) + } + upload_output_file(output_file_dict, poll_request.output_prefix) + state = SUCCEEDED + else: + state = RUNNING return PollResponse(state=state) From bda6432356654dd5c2626042b6cc6ace2d299656 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 17 Feb 2023 21:06:59 -0800 Subject: [PATCH 10/50] test Signed-off-by: Kevin Su --- .../flytekitplugins/bigquery/backend_plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 3ba2f24446..180cec0ac5 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -61,7 +61,8 @@ async def poll(self, poll_request: PollRequest) -> PollResponse: client = bigquery.Client() job = client.get_job(poll_request.job_id) state = convert_to_flyte_state(str(job.state)) - if state == SUCCEEDED: + + if poll_request.prev_state != SUCCEEDED and state == SUCCEEDED: ctx = FlyteContextManager.current_context() output_location = f"bq://{job.destination.project}:{job.destination.dataset_id}.{job.destination.table_id}" output_file_dict = { From 609f852aa7fae9f1a11538eb1d7479afb1ace4d4 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 21 Feb 2023 22:14:55 -0800 Subject: [PATCH 11/50] wip Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 16 +++++++++------ flytekit/extend/backend/grpc_server.py | 11 ++++++++++ flytekit/extend/backend/model.py | 28 ++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 flytekit/extend/backend/grpc_server.py create mode 100644 flytekit/extend/backend/model.py diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 75349fe8db..1f69afbfea 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -1,6 +1,10 @@ +import typing from abc import abstractmethod from typing import List, Optional +from flyteidl.core.tasks_pb2 import TaskTemplate + +from flytekit.models.interface import VariableMap from pydantic import BaseModel PENDING = "pending" @@ -9,8 +13,8 @@ class CreateRequest(BaseModel): - inputs_path: str - task_template_path: str + inputs: VariableMap + task_template: TaskTemplate class CreateResponse(BaseModel): @@ -55,15 +59,15 @@ async def terminate(self, job_id: str): class BackendPluginRegistry(object): - _REGISTRY = [] + _REGISTRY: typing.Dict[str, BackendPluginBase] = {} @staticmethod def register(plugin: BackendPluginBase): - BackendPluginRegistry._REGISTRY.append(plugin) + BackendPluginRegistry._REGISTRY[plugin.task_type] = plugin @staticmethod - def list_registered_plugins() -> List[BackendPluginBase]: - return BackendPluginRegistry._REGISTRY + def get_plugin(task_type: str): + return BackendPluginRegistry._REGISTRY[task_type] def convert_to_flyte_state(state: str): diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py new file mode 100644 index 0000000000..f4201f9955 --- /dev/null +++ b/flytekit/extend/backend/grpc_server.py @@ -0,0 +1,11 @@ +from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer +from flyteidl.service import plugin_system_pb2 + +from flytekit.extend.backend.base_plugin import BackendPluginRegistry, CreateRequest + + +class BackendPluginServer(BackendPluginServiceServicer): + def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): + plugin = BackendPluginRegistry.get_plugin(request.task_type) + plugin.create(CreateRequest()) + return plugin_system_pb2.TaskCreateResponse() diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py new file mode 100644 index 0000000000..148768d482 --- /dev/null +++ b/flytekit/extend/backend/model.py @@ -0,0 +1,28 @@ +from flytekit.models import common as _common +from flyteidl.service import plugin_system_pb2 + + +class TaskCreateRequest(_common.FlyteIdlEntity): + def __init__(self, task_type: str, input, template): + self._task_type = task_type + self._input = input + self._template = template + + @property + def task_type(self): + return self._task_type + + @property + def input(self): + return self._input + + @property + def template(self): + return self._template + + def to_flyte_idl(self): + return plugin_system_pb2.TaskCreateRequest(task_type=self.task_type, input=self.input, template=self.template) + + @classmethod + def from_flyte_idl(cls, proto): + return cls(task_type=proto.proto, input=proto.input, template=proto.template) \ No newline at end of file From c06662de6e1bd7f347826d54d2e58b001e3e1797 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 23 Feb 2023 00:49:06 -0800 Subject: [PATCH 12/50] wip Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 28 +++++++++++++++----------- flytekit/extend/backend/grpc_server.py | 8 +++++--- flytekit/extend/backend/model.py | 23 +++++++++++++-------- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 1f69afbfea..78aed38db0 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -1,34 +1,38 @@ +from dataclasses import dataclass import typing from abc import abstractmethod -from typing import List, Optional +from typing import Optional from flyteidl.core.tasks_pb2 import TaskTemplate from flytekit.models.interface import VariableMap -from pydantic import BaseModel PENDING = "pending" SUCCEEDED = "succeeded" RUNNING = "running" -class CreateRequest(BaseModel): +@dataclass +class CreateRequest: inputs: VariableMap task_template: TaskTemplate -class CreateResponse(BaseModel): +@dataclass +class CreateResponse: job_id: str message: Optional[str] -class PollRequest(BaseModel): +@dataclass +class PollRequest: job_id: str output_prefix: str prev_state: str -class PollResponse(BaseModel): +@dataclass +class PollResponse: state: str message: Optional[str] @@ -42,19 +46,19 @@ def task_type(self) -> str: return self._task_type @abstractmethod - async def initialize(self): + def initialize(self): pass @abstractmethod - async def create(self, create_request: CreateRequest) -> CreateResponse: + def create(self, create_request: CreateRequest) -> CreateResponse: pass @abstractmethod - async def poll(self, poll_request: PollRequest) -> PollResponse: + def poll(self, poll_request: PollRequest) -> PollResponse: pass @abstractmethod - async def terminate(self, job_id: str): + def terminate(self, job_id: str): pass @@ -66,11 +70,11 @@ def register(plugin: BackendPluginBase): BackendPluginRegistry._REGISTRY[plugin.task_type] = plugin @staticmethod - def get_plugin(task_type: str): + def get_plugin(task_type: str) -> BackendPluginBase: return BackendPluginRegistry._REGISTRY[task_type] -def convert_to_flyte_state(state: str): +def convert_to_flyte_state(state: str) -> str: if state.lower() in [PENDING]: return PENDING if state.lower() in ["done", SUCCEEDED]: diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index f4201f9955..a408402399 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -1,11 +1,13 @@ -from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer from flyteidl.service import plugin_system_pb2 +from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer from flytekit.extend.backend.base_plugin import BackendPluginRegistry, CreateRequest +from flytekit.extend.backend.model import TaskCreateRequest class BackendPluginServer(BackendPluginServiceServicer): def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): - plugin = BackendPluginRegistry.get_plugin(request.task_type) - plugin.create(CreateRequest()) + req = TaskCreateRequest.from_flyte_idl(request) + plugin = BackendPluginRegistry.get_plugin(req.task_type) + plugin.create(CreateRequest(req.inputs, req.template)) return plugin_system_pb2.TaskCreateResponse() diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py index 148768d482..d561096a4d 100644 --- a/flytekit/extend/backend/model.py +++ b/flytekit/extend/backend/model.py @@ -1,11 +1,12 @@ -from flytekit.models import common as _common from flyteidl.service import plugin_system_pb2 +from flytekit.models import common, interface, task -class TaskCreateRequest(_common.FlyteIdlEntity): - def __init__(self, task_type: str, input, template): + +class TaskCreateRequest(common.FlyteIdlEntity): + def __init__(self, task_type: str, inputs: interface.VariableMap, template: task.TaskTemplate): self._task_type = task_type - self._input = input + self._inputs = inputs self._template = template @property @@ -13,16 +14,22 @@ def task_type(self): return self._task_type @property - def input(self): - return self._input + def inputs(self): + return self._inputs @property def template(self): return self._template def to_flyte_idl(self): - return plugin_system_pb2.TaskCreateRequest(task_type=self.task_type, input=self.input, template=self.template) + return plugin_system_pb2.TaskCreateRequest( + task_type=self.task_type, inputs=self.inputs.to_flyte_idl(), template=self.template.to_flyte_idl() + ) @classmethod def from_flyte_idl(cls, proto): - return cls(task_type=proto.proto, input=proto.input, template=proto.template) \ No newline at end of file + return cls( + task_type=proto.proto, + inputs=interface.VariableMap.from_flyte_idl(proto.inputs), + template=task.TaskTemplate.from_flyte_idl(proto.template), + ) From 625548b0dc6218d9e9e6625431d3cd80246b2ad7 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 23 Feb 2023 15:33:52 -0800 Subject: [PATCH 13/50] Add grpc server Signed-off-by: Kevin Su --- backend-plugin-system_grpc.yaml | 34 +++++++++++ flytekit/clients/friendly.py | 2 +- flytekit/clis/sdk_in_container/pyflyte.py | 2 + flytekit/clis/sdk_in_container/serve.py | 22 +++++++ flytekit/extend/backend/base_plugin.py | 4 +- flytekit/extend/backend/dummy_plugin.py | 7 +-- flytekit/extend/backend/fastapi.py | 73 ----------------------- flytekit/extend/backend/grpc_server.py | 17 ++++-- flytekit/extend/backend/model.py | 2 +- 9 files changed, 77 insertions(+), 86 deletions(-) create mode 100644 backend-plugin-system_grpc.yaml create mode 100644 flytekit/clis/sdk_in_container/serve.py delete mode 100644 flytekit/extend/backend/fastapi.py diff --git a/backend-plugin-system_grpc.yaml b/backend-plugin-system_grpc.yaml new file mode 100644 index 0000000000..a5f06532d0 --- /dev/null +++ b/backend-plugin-system_grpc.yaml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: backend-plugin-system + labels: + app: backend-plugin-system +spec: + replicas: 1 + selector: + matchLabels: + app: backend-plugin-system + template: + metadata: + labels: + app: backend-plugin-system + spec: + containers: + - name: backend-plugin-system + image: pingsutw/backend-plugin-system:v1 + ports: + - containerPort: 8000 + +--- +apiVersion: v1 +kind: Service +metadata: + name: backend-plugin-system +spec: + selector: + app: backend-plugin-system + ports: + - protocol: TCP + port: 8000 + targetPort: 8000 diff --git a/flytekit/clients/friendly.py b/flytekit/clients/friendly.py index d542af5f7e..2b15dfbd50 100644 --- a/flytekit/clients/friendly.py +++ b/flytekit/clients/friendly.py @@ -1007,7 +1007,7 @@ def get_upload_signed_url( def get_download_signed_url( self, native_url: str, expires_in: datetime.timedelta = None - ) -> _data_proxy_pb2.CreateUploadLocationResponse: + ) -> _data_proxy_pb2.CreateDownloadLocationRequest: expires_in_pb = None if expires_in: expires_in_pb = Duration() diff --git a/flytekit/clis/sdk_in_container/pyflyte.py b/flytekit/clis/sdk_in_container/pyflyte.py index 76777c5663..11753080c6 100644 --- a/flytekit/clis/sdk_in_container/pyflyte.py +++ b/flytekit/clis/sdk_in_container/pyflyte.py @@ -6,6 +6,7 @@ from flytekit.clis.sdk_in_container.local_cache import local_cache from flytekit.clis.sdk_in_container.package import package from flytekit.clis.sdk_in_container.register import register +from flytekit.clis.sdk_in_container.serve import serve from flytekit.clis.sdk_in_container.run import run from flytekit.clis.sdk_in_container.serialize import serialize from flytekit.configuration.internal import LocalSDK @@ -70,6 +71,7 @@ def main(ctx, pkgs=None, config=None): main.add_command(init) main.add_command(run) main.add_command(register) +main.add_command(serve) if __name__ == "__main__": main() diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py new file mode 100644 index 0000000000..4f7a1fe5f7 --- /dev/null +++ b/flytekit/clis/sdk_in_container/serve.py @@ -0,0 +1,22 @@ +from concurrent import futures + +import click +import grpc +from flytekit.loggers import cli_logger +from flyteidl.service.plugin_system_pb2_grpc import add_BackendPluginServiceServicer_to_server + +from flytekit.extend.backend.grpc_server import BackendPluginServer + +_serve_help = """Start a grpc server for the backend plugin system.""" + + +@click.command("serve", help=_serve_help) +@click.pass_context +def serve(_: click.Context): + print("Starting a grpc server for the backend plugin system.") + server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + add_BackendPluginServiceServicer_to_server(BackendPluginServer(), server) + + server.add_insecure_port('[::]:50051') + server.start() + server.wait_for_termination() diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 78aed38db0..05d20b3173 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -21,7 +21,7 @@ class CreateRequest: @dataclass class CreateResponse: job_id: str - message: Optional[str] + message: Optional[str] = None @dataclass @@ -34,7 +34,7 @@ class PollRequest: @dataclass class PollResponse: state: str - message: Optional[str] + message: Optional[str] = None class BackendPluginBase: diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index e8dac7245f..d0d700d631 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -28,10 +28,7 @@ async def initialize(self): pass async def create(self, create_request: CreateRequest) -> CreateResponse: - _ = get_task_template(create_request.task_template_path) - _ = get_task_inputs(create_request.inputs_path) - sleep(1) - + print("creating") return CreateResponse(job_id="fake_id") async def poll(self, poll_request: PollRequest) -> PollResponse: @@ -61,7 +58,7 @@ async def poll(self, poll_request: PollRequest) -> PollResponse: return PollResponse(state=state) async def terminate(self, job_id): - sleep(1) + print("deleting") BackendPluginRegistry.register(DummyPlugin()) diff --git a/flytekit/extend/backend/fastapi.py b/flytekit/extend/backend/fastapi.py deleted file mode 100644 index 955444495f..0000000000 --- a/flytekit/extend/backend/fastapi.py +++ /dev/null @@ -1,73 +0,0 @@ -import typing -from http import HTTPStatus - -from fastapi import FastAPI -from fastapi.responses import HTMLResponse - -from flytekit import __version__ -from flytekit.extend.backend.base_plugin import ( - BackendPluginBase, - BackendPluginRegistry, - CreateRequest, - CreateResponse, - PollRequest, - PollResponse, -) - -PLUGINS_V1 = "plugins/v1" - - -def _create_root_welcome(app: FastAPI, plugins: typing.List[BackendPluginBase]): - l = "" - for p in plugins: - l += f"
  • TaskType: {p.task_type}, Version: {__version__}
  • " - - @app.get("/", response_class=HTMLResponse) - def root(): - return f""" - - - FlyteBackend Plugin Server - - -

    Flyte Backend plugin server.

    -

    Registered plugins

    -
      - {l} -
    - - - """ - - -def _create_health_check(app: FastAPI): - @app.get("/health") - def health(): - return {"message": HTTPStatus.OK.phrase, "status": HTTPStatus.OK} - - -def _serve_plugin(app: FastAPI, plugin: BackendPluginBase): - @app.post(f"/{PLUGINS_V1}/{plugin.task_type}", response_model=CreateResponse) - async def create(create_request: CreateRequest): - return await plugin.create(create_request) - - @app.delete(f"/{PLUGINS_V1}/{plugin.task_type}") - async def terminate(job_id: str): - return await plugin.terminate(job_id) - - @app.get(f"/{PLUGINS_V1}/{plugin.task_type}", response_model=PollResponse) - async def poll(poll_request: PollRequest): - return await plugin.poll(poll_request) - - -def serve_all_registered_plugins(app: FastAPI): - plugins = BackendPluginRegistry.list_registered_plugins() - _create_root_welcome(app, plugins) - _create_health_check(app) - for plugin in plugins: - plugin.initialize() - _serve_plugin(app, plugin) - - -app = FastAPI() -serve_all_registered_plugins(app) diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index a408402399..eccfa8e016 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -1,7 +1,10 @@ +from concurrent import futures + +import grpc from flyteidl.service import plugin_system_pb2 -from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer +from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer, add_BackendPluginServiceServicer_to_server -from flytekit.extend.backend.base_plugin import BackendPluginRegistry, CreateRequest +from flytekit.extend.backend.base_plugin import BackendPluginRegistry, CreateRequest, SUCCEEDED from flytekit.extend.backend.model import TaskCreateRequest @@ -9,5 +12,11 @@ class BackendPluginServer(BackendPluginServiceServicer): def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): req = TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.task_type) - plugin.create(CreateRequest(req.inputs, req.template)) - return plugin_system_pb2.TaskCreateResponse() + res = plugin.create(CreateRequest(req.inputs, req.template)) + return plugin_system_pb2.TaskCreateResponse(res.job_id, res.message) + + def GetTask(self, request, context): + return plugin_system_pb2.TaskGetResponse(state=SUCCEEDED) + + def DeleteTask(self, request, context): + print("deleting") diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py index d561096a4d..140b55114c 100644 --- a/flytekit/extend/backend/model.py +++ b/flytekit/extend/backend/model.py @@ -30,6 +30,6 @@ def to_flyte_idl(self): def from_flyte_idl(cls, proto): return cls( task_type=proto.proto, - inputs=interface.VariableMap.from_flyte_idl(proto.inputs), + inputs=interface.VariableMap.from_flyte_idl(proto.inputs) if proto.inputs is not None else None, template=task.TaskTemplate.from_flyte_idl(proto.template), ) From c44690064f44863b9873ec63203def6b6a9e3a47 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 23 Feb 2023 16:14:37 -0800 Subject: [PATCH 14/50] nit Signed-off-by: Kevin Su --- flytekit/clis/sdk_in_container/serve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 4f7a1fe5f7..89328bdb6f 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -17,6 +17,6 @@ def serve(_: click.Context): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_BackendPluginServiceServicer_to_server(BackendPluginServer(), server) - server.add_insecure_port('[::]:50051') + server.add_insecure_port('[::]:8000') server.start() server.wait_for_termination() From 787031e073517f39f339fbbf5b07b602e3c83e52 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 23 Feb 2023 16:30:00 -0800 Subject: [PATCH 15/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/grpc_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index eccfa8e016..07558b95e1 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -11,7 +11,7 @@ class BackendPluginServer(BackendPluginServiceServicer): def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): req = TaskCreateRequest.from_flyte_idl(request) - plugin = BackendPluginRegistry.get_plugin(req.task_type) + plugin = BackendPluginRegistry.get_plugin(req.template.type) res = plugin.create(CreateRequest(req.inputs, req.template)) return plugin_system_pb2.TaskCreateResponse(res.job_id, res.message) From 996552ce3a7d6637dfef2d8cea64643c6e6cb4ad Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 24 Feb 2023 03:02:07 -0800 Subject: [PATCH 16/50] grpc plugin Signed-off-by: Kevin Su --- flytekit/extend/backend/grpc_server.py | 5 +---- flytekit/extend/backend/model.py | 10 ++-------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index 07558b95e1..9ff196496c 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -1,6 +1,3 @@ -from concurrent import futures - -import grpc from flyteidl.service import plugin_system_pb2 from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer, add_BackendPluginServiceServicer_to_server @@ -16,7 +13,7 @@ def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): return plugin_system_pb2.TaskCreateResponse(res.job_id, res.message) def GetTask(self, request, context): - return plugin_system_pb2.TaskGetResponse(state=SUCCEEDED) + return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) def DeleteTask(self, request, context): print("deleting") diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py index 140b55114c..8ef0c36b44 100644 --- a/flytekit/extend/backend/model.py +++ b/flytekit/extend/backend/model.py @@ -4,15 +4,10 @@ class TaskCreateRequest(common.FlyteIdlEntity): - def __init__(self, task_type: str, inputs: interface.VariableMap, template: task.TaskTemplate): - self._task_type = task_type + def __init__(self, inputs: interface.VariableMap, template: task.TaskTemplate): self._inputs = inputs self._template = template - @property - def task_type(self): - return self._task_type - @property def inputs(self): return self._inputs @@ -23,13 +18,12 @@ def template(self): def to_flyte_idl(self): return plugin_system_pb2.TaskCreateRequest( - task_type=self.task_type, inputs=self.inputs.to_flyte_idl(), template=self.template.to_flyte_idl() + inputs=self.inputs.to_flyte_idl(), template=self.template.to_flyte_idl() ) @classmethod def from_flyte_idl(cls, proto): return cls( - task_type=proto.proto, inputs=interface.VariableMap.from_flyte_idl(proto.inputs) if proto.inputs is not None else None, template=task.TaskTemplate.from_flyte_idl(proto.template), ) From ce60f20cfb23e2f4d70098b35e6418a6703a30d6 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 24 Feb 2023 14:54:41 -0800 Subject: [PATCH 17/50] nit Signed-off-by: Kevin Su --- backend-plugin-system_grpc.yaml | 16 ++++++++-------- flytekit/extend/backend/base_plugin.py | 3 ++- flytekit/extend/backend/grpc_server.py | 2 ++ flytekit/extend/backend/model.py | 7 ++++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/backend-plugin-system_grpc.yaml b/backend-plugin-system_grpc.yaml index a5f06532d0..971e944513 100644 --- a/backend-plugin-system_grpc.yaml +++ b/backend-plugin-system_grpc.yaml @@ -1,22 +1,22 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: backend-plugin-system + name: backend-plugin-system-grpc labels: - app: backend-plugin-system + app: backend-plugin-system-grpc spec: replicas: 1 selector: matchLabels: - app: backend-plugin-system + app: backend-plugin-system-grpc template: metadata: labels: - app: backend-plugin-system + app: backend-plugin-system-grpc spec: containers: - - name: backend-plugin-system - image: pingsutw/backend-plugin-system:v1 + - name: backend-plugin-system-grpc + image: pingsutw/backend-plugin-system-grpc:v1 ports: - containerPort: 8000 @@ -24,10 +24,10 @@ spec: apiVersion: v1 kind: Service metadata: - name: backend-plugin-system + name: backend-plugin-system-grpc-svc spec: selector: - app: backend-plugin-system + app: backend-plugin-system-grpc ports: - protocol: TCP port: 8000 diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 05d20b3173..efc4464fc3 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -6,6 +6,7 @@ from flyteidl.core.tasks_pb2 import TaskTemplate from flytekit.models.interface import VariableMap +from flytekit.models.literals import LiteralMap PENDING = "pending" SUCCEEDED = "succeeded" @@ -14,7 +15,7 @@ @dataclass class CreateRequest: - inputs: VariableMap + inputs: LiteralMap task_template: TaskTemplate diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index 9ff196496c..184dfbd447 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -7,12 +7,14 @@ class BackendPluginServer(BackendPluginServiceServicer): def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): + print("creating") req = TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) res = plugin.create(CreateRequest(req.inputs, req.template)) return plugin_system_pb2.TaskCreateResponse(res.job_id, res.message) def GetTask(self, request, context): + print("getting") return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) def DeleteTask(self, request, context): diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py index 8ef0c36b44..3e453bac22 100644 --- a/flytekit/extend/backend/model.py +++ b/flytekit/extend/backend/model.py @@ -1,10 +1,11 @@ from flyteidl.service import plugin_system_pb2 -from flytekit.models import common, interface, task +from flytekit.models import common, task +from flytekit.models.literals import LiteralMap class TaskCreateRequest(common.FlyteIdlEntity): - def __init__(self, inputs: interface.VariableMap, template: task.TaskTemplate): + def __init__(self, inputs: LiteralMap, template: task.TaskTemplate): self._inputs = inputs self._template = template @@ -24,6 +25,6 @@ def to_flyte_idl(self): @classmethod def from_flyte_idl(cls, proto): return cls( - inputs=interface.VariableMap.from_flyte_idl(proto.inputs) if proto.inputs is not None else None, + inputs=LiteralMap.from_flyte_idl(proto.inputs) if proto.inputs is not None else None, template=task.TaskTemplate.from_flyte_idl(proto.template), ) From 18af9ac6dbab3e236914b620d9f50c4102a29269 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 24 Feb 2023 15:11:17 -0800 Subject: [PATCH 18/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 5 +++-- flytekit/extend/backend/dummy_plugin.py | 21 ++++++++++----------- flytekit/extend/backend/grpc_server.py | 16 ++++++++++------ 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index efc4464fc3..e0b71ee7bf 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -4,6 +4,7 @@ from typing import Optional from flyteidl.core.tasks_pb2 import TaskTemplate +from flyteidl.service import plugin_system_pb2 from flytekit.models.interface import VariableMap from flytekit.models.literals import LiteralMap @@ -29,12 +30,12 @@ class CreateResponse: class PollRequest: job_id: str output_prefix: str - prev_state: str + prev_state: plugin_system_pb2.State @dataclass class PollResponse: - state: str + state: plugin_system_pb2.State message: Optional[str] = None diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index d0d700d631..903083f6b8 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -1,12 +1,11 @@ from random import randint -from time import sleep + +from flyteidl.service import plugin_system_pb2 from flytekit import FlyteContextManager, StructuredDataset from flytekit.core import constants from flytekit.core.type_engine import TypeEngine from flytekit.extend.backend.base_plugin import ( - RUNNING, - SUCCEEDED, BackendPluginBase, BackendPluginRegistry, CreateRequest, @@ -24,16 +23,16 @@ class DummyPlugin(BackendPluginBase): def __init__(self): super().__init__(task_type="dummy") - async def initialize(self): + def initialize(self): pass - async def create(self, create_request: CreateRequest) -> CreateResponse: + def create(self, create_request: CreateRequest) -> CreateResponse: print("creating") return CreateResponse(job_id="fake_id") - async def poll(self, poll_request: PollRequest) -> PollResponse: - if poll_request.prev_state == SUCCEEDED: - return PollResponse(state=SUCCEEDED) + def poll(self, poll_request: PollRequest) -> PollResponse: + if poll_request.prev_state == plugin_system_pb2.SUCCEEDED: + return PollResponse(state=plugin_system_pb2.SUCCEEDED) x = randint(1, 100) if x > 50: @@ -51,13 +50,13 @@ async def poll(self, poll_request: PollRequest) -> PollResponse: ) } upload_output_file(output_file_dict, poll_request.output_prefix) - state = SUCCEEDED + state = plugin_system_pb2.SUCCEEDED else: - state = RUNNING + state = plugin_system_pb2.RUNNING return PollResponse(state=state) - async def terminate(self, job_id): + def terminate(self, job_id): print("deleting") diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index 184dfbd447..0951708408 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -1,7 +1,7 @@ from flyteidl.service import plugin_system_pb2 from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer, add_BackendPluginServiceServicer_to_server -from flytekit.extend.backend.base_plugin import BackendPluginRegistry, CreateRequest, SUCCEEDED +from flytekit.extend.backend.base_plugin import BackendPluginRegistry, CreateRequest, SUCCEEDED, PollRequest from flytekit.extend.backend.model import TaskCreateRequest @@ -11,11 +11,15 @@ def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): req = TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) res = plugin.create(CreateRequest(req.inputs, req.template)) - return plugin_system_pb2.TaskCreateResponse(res.job_id, res.message) + return plugin_system_pb2.TaskCreateResponse(job_id=res.job_id, message=res.message) - def GetTask(self, request, context): + def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context): print("getting") - return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) + plugin = BackendPluginRegistry.get_plugin(request.task_type) + res = plugin.poll(PollRequest(job_id=request.job_id, output_prefix=request.output_prefix, prev_state=request.prev_state)) + return plugin_system_pb2.TaskGetResponse(state=res.state, message=res.message) - def DeleteTask(self, request, context): - print("deleting") + def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context): + plugin = BackendPluginRegistry.get_plugin(request.task_type) + res = plugin.terminate(request.job_id) + print("deleting", res) From 047b7f1a6947a42af6d6a59532881faf9c70ced9 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 24 Feb 2023 15:51:10 -0800 Subject: [PATCH 19/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/grpc_server.py | 1 + .../flytekitplugins/bigquery/backend_plugin.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index 0951708408..eedd75039b 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -23,3 +23,4 @@ def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context): plugin = BackendPluginRegistry.get_plugin(request.task_type) res = plugin.terminate(request.job_id) print("deleting", res) + return plugin_system_pb2.TaskDeleteResponse() \ No newline at end of file diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 180cec0ac5..f39c887813 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -29,11 +29,11 @@ class BigQueryPlugin(BackendPluginBase): def __init__(self): super().__init__(task_type="bigquery") - async def initialize(self): + def initialize(self): # TODO: Read GOOGLE_APPLICATION_CREDENTIALS from secret. If not found, raise an error. pass - async def create(self, create_request: CreateRequest) -> CreateResponse: + def create(self, create_request: CreateRequest) -> CreateResponse: ctx = FlyteContextManager.current_context() task_template = get_task_template(create_request.task_template_path) task_input_literals = get_task_inputs(create_request.inputs_path) @@ -57,7 +57,7 @@ async def create(self, create_request: CreateRequest) -> CreateResponse: return CreateResponse(job_id=query_job.job_id) - async def poll(self, poll_request: PollRequest) -> PollResponse: + def poll(self, poll_request: PollRequest) -> PollResponse: client = bigquery.Client() job = client.get_job(poll_request.job_id) state = convert_to_flyte_state(str(job.state)) @@ -81,7 +81,7 @@ async def poll(self, poll_request: PollRequest) -> PollResponse: return PollResponse(job_id=job.job_id, state=state) - async def terminate(self, job_id): + def terminate(self, job_id): client = bigquery.Client() client.cancel_job(job_id) From cf9cf3e6718c5cf1008890fe360b0e6d8ee75491 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Sat, 25 Feb 2023 02:48:14 -0800 Subject: [PATCH 20/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/grpc_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index eedd75039b..ebb482f29c 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -23,4 +23,4 @@ def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context): plugin = BackendPluginRegistry.get_plugin(request.task_type) res = plugin.terminate(request.job_id) print("deleting", res) - return plugin_system_pb2.TaskDeleteResponse() \ No newline at end of file + return plugin_system_pb2.TaskDeleteResponse() From 82c048fab82f8eb2ae09d8286b209f826f60c134 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 27 Feb 2023 11:36:16 -0800 Subject: [PATCH 21/50] grpc server Signed-off-by: Kevin Su --- Dockerfile.backend-plugin-system-grpc | 17 ++++++++ flytekit/clis/sdk_in_container/pyflyte.py | 2 +- flytekit/clis/sdk_in_container/serve.py | 4 +- flytekit/extend/backend/base_plugin.py | 38 +++++++---------- flytekit/extend/backend/dummy_plugin.py | 25 ++++++----- flytekit/extend/backend/grpc_server.py | 18 ++++---- .../bigquery/backend_plugin.py | 42 +++++++++---------- 7 files changed, 80 insertions(+), 66 deletions(-) create mode 100644 Dockerfile.backend-plugin-system-grpc diff --git a/Dockerfile.backend-plugin-system-grpc b/Dockerfile.backend-plugin-system-grpc new file mode 100644 index 0000000000..a8fee1053f --- /dev/null +++ b/Dockerfile.backend-plugin-system-grpc @@ -0,0 +1,17 @@ +FROM python:3.9-slim-buster + +MAINTAINER Flyte Team +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytekit + +WORKDIR /root +ENV PYTHONPATH /root + +RUN apt-get update && apt-get install -y git +RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc#egg=flytekitplugins-bigquery&subdirectory=plugins/flytekit-bigquery" +RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc" +RUN pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" +RUN pip install numpy==1.23.1 +RUN pip install awscli +RUN pip install protobuf==3.20.1 + +CMD pyflyte serve diff --git a/flytekit/clis/sdk_in_container/pyflyte.py b/flytekit/clis/sdk_in_container/pyflyte.py index 11753080c6..6dfaaa0c58 100644 --- a/flytekit/clis/sdk_in_container/pyflyte.py +++ b/flytekit/clis/sdk_in_container/pyflyte.py @@ -6,9 +6,9 @@ from flytekit.clis.sdk_in_container.local_cache import local_cache from flytekit.clis.sdk_in_container.package import package from flytekit.clis.sdk_in_container.register import register -from flytekit.clis.sdk_in_container.serve import serve from flytekit.clis.sdk_in_container.run import run from flytekit.clis.sdk_in_container.serialize import serialize +from flytekit.clis.sdk_in_container.serve import serve from flytekit.configuration.internal import LocalSDK from flytekit.loggers import cli_logger diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 89328bdb6f..0e5c34a931 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -2,10 +2,10 @@ import click import grpc -from flytekit.loggers import cli_logger from flyteidl.service.plugin_system_pb2_grpc import add_BackendPluginServiceServicer_to_server from flytekit.extend.backend.grpc_server import BackendPluginServer +from flytekit.loggers import cli_logger _serve_help = """Start a grpc server for the backend plugin system.""" @@ -17,6 +17,6 @@ def serve(_: click.Context): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_BackendPluginServiceServicer_to_server(BackendPluginServer(), server) - server.add_insecure_port('[::]:8000') + server.add_insecure_port("[::]:8000") server.start() server.wait_for_termination() diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index e0b71ee7bf..c34884eb0e 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -1,28 +1,18 @@ -from dataclasses import dataclass import typing from abc import abstractmethod +from dataclasses import dataclass from typing import Optional from flyteidl.core.tasks_pb2 import TaskTemplate from flyteidl.service import plugin_system_pb2 -from flytekit.models.interface import VariableMap from flytekit.models.literals import LiteralMap -PENDING = "pending" -SUCCEEDED = "succeeded" -RUNNING = "running" - - -@dataclass -class CreateRequest: - inputs: LiteralMap - task_template: TaskTemplate - @dataclass class CreateResponse: job_id: str + output_prefix: str message: Optional[str] = None @@ -52,15 +42,19 @@ def initialize(self): pass @abstractmethod - def create(self, create_request: CreateRequest) -> CreateResponse: + def create( + self, inputs: LiteralMap, output_prefix: str, task_template: TaskTemplate + ) -> plugin_system_pb2.TaskCreateResponse: pass @abstractmethod - def poll(self, poll_request: PollRequest) -> PollResponse: + def get( + self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State + ) -> plugin_system_pb2.TaskGetResponse: pass @abstractmethod - def terminate(self, job_id: str): + def delete(self, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: pass @@ -76,11 +70,11 @@ def get_plugin(task_type: str) -> BackendPluginBase: return BackendPluginRegistry._REGISTRY[task_type] -def convert_to_flyte_state(state: str) -> str: - if state.lower() in [PENDING]: - return PENDING - if state.lower() in ["done", SUCCEEDED]: - return SUCCEEDED - if state.lower() in [RUNNING]: - return RUNNING +def convert_to_flyte_state(state: str) -> plugin_system_pb2.State: + if state.lower() in ["failed"]: + return plugin_system_pb2.FAILED + if state.lower() in ["done", "succeeded"]: + return plugin_system_pb2.SUCCEEDED + if state.lower() in ["running"]: + return plugin_system_pb2.RUNNING raise ValueError("Unrecognize state") diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index 903083f6b8..28a99ffbfa 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -8,13 +8,14 @@ from flytekit.extend.backend.base_plugin import ( BackendPluginBase, BackendPluginRegistry, - CreateRequest, CreateResponse, PollRequest, PollResponse, ) from flytekit.extend.backend.utils import get_task_inputs, get_task_template, upload_output_file from flytekit.models import literals +from flytekit.models.literals import LiteralMap +from flytekit.models.task import TaskTemplate from flytekit.models.types import LiteralType, StructuredDatasetType @@ -26,13 +27,16 @@ def __init__(self): def initialize(self): pass - def create(self, create_request: CreateRequest) -> CreateResponse: - print("creating") - return CreateResponse(job_id="fake_id") + def create( + self, inputs: LiteralMap, output_prefix: str, task_template: TaskTemplate + ) -> plugin_system_pb2.TaskCreateResponse: + return plugin_system_pb2.TaskCreateResponse(job_id="fake_id") - def poll(self, poll_request: PollRequest) -> PollResponse: - if poll_request.prev_state == plugin_system_pb2.SUCCEEDED: - return PollResponse(state=plugin_system_pb2.SUCCEEDED) + def get( + self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State + ) -> plugin_system_pb2.TaskGetResponse: + if prev_state == plugin_system_pb2.SUCCEEDED: + return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) x = randint(1, 100) if x > 50: @@ -49,15 +53,16 @@ def poll(self, poll_request: PollRequest) -> PollResponse: } ) } - upload_output_file(output_file_dict, poll_request.output_prefix) + upload_output_file(output_file_dict, output_prefix) state = plugin_system_pb2.SUCCEEDED else: state = plugin_system_pb2.RUNNING - return PollResponse(state=state) + return plugin_system_pb2.TaskGetResponse(state=state) - def terminate(self, job_id): + def delete(self, job_id) -> plugin_system_pb2.TaskDeleteResponse: print("deleting") + return plugin_system_pb2.TaskDeleteResponse() BackendPluginRegistry.register(DummyPlugin()) diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index ebb482f29c..f3ee0047fd 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -1,26 +1,26 @@ from flyteidl.service import plugin_system_pb2 -from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer, add_BackendPluginServiceServicer_to_server +from flyteidl.service.plugin_system_pb2_grpc import ( + BackendPluginServiceServicer, + add_BackendPluginServiceServicer_to_server, +) -from flytekit.extend.backend.base_plugin import BackendPluginRegistry, CreateRequest, SUCCEEDED, PollRequest +from flytekit.extend.backend.base_plugin import BackendPluginRegistry, PollRequest from flytekit.extend.backend.model import TaskCreateRequest class BackendPluginServer(BackendPluginServiceServicer): - def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context): - print("creating") + def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context) -> plugin_system_pb2.TaskCreateResponse: req = TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) - res = plugin.create(CreateRequest(req.inputs, req.template)) + res = plugin.create(req.inputs, req.output_prefix, req.template) return plugin_system_pb2.TaskCreateResponse(job_id=res.job_id, message=res.message) def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context): - print("getting") plugin = BackendPluginRegistry.get_plugin(request.task_type) - res = plugin.poll(PollRequest(job_id=request.job_id, output_prefix=request.output_prefix, prev_state=request.prev_state)) + res = plugin.get(job_id=request.job_id, output_prefix=request.output_prefix, prev_state=request.prev_state) return plugin_system_pb2.TaskGetResponse(state=res.state, message=res.message) def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context): plugin = BackendPluginRegistry.get_plugin(request.task_type) - res = plugin.terminate(request.job_id) - print("deleting", res) + res = plugin.delete(request.job_id) return plugin_system_pb2.TaskDeleteResponse() diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index f39c887813..ddaaae3cab 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -1,22 +1,17 @@ from typing import Dict +from flyteidl.service import plugin_system_pb2 +from flyteidl.service.plugin_system_pb2 import TaskGetResponse from google.cloud import bigquery from flytekit import FlyteContextManager, StructuredDataset from flytekit.core import constants from flytekit.core.type_engine import TypeEngine -from flytekit.extend.backend.base_plugin import ( - SUCCEEDED, - BackendPluginBase, - BackendPluginRegistry, - CreateRequest, - CreateResponse, - PollRequest, - PollResponse, - convert_to_flyte_state, -) +from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry, convert_to_flyte_state from flytekit.extend.backend.utils import get_task_inputs, get_task_template, upload_output_file from flytekit.models import literals +from flytekit.models.literals import LiteralMap +from flytekit.models.task import TaskTemplate from flytekit.models.types import LiteralType, StructuredDatasetType pythonTypeToBigQueryType: Dict[type, str] = { @@ -33,17 +28,17 @@ def initialize(self): # TODO: Read GOOGLE_APPLICATION_CREDENTIALS from secret. If not found, raise an error. pass - def create(self, create_request: CreateRequest) -> CreateResponse: + def create( + self, inputs: LiteralMap, output_prefix: str, task_template: TaskTemplate + ) -> plugin_system_pb2.TaskCreateResponse: ctx = FlyteContextManager.current_context() - task_template = get_task_template(create_request.task_template_path) - task_input_literals = get_task_inputs(create_request.inputs_path) # 3. Submit the job # TODO: is there any other way to get python interface input? python_interface_inputs = { name: TypeEngine.guess_python_type(lt.type) for name, lt in task_template.interface.inputs.items() } - native_inputs = TypeEngine.literal_map_to_kwargs(ctx, task_input_literals, python_interface_inputs) + native_inputs = TypeEngine.literal_map_to_kwargs(ctx, inputs, python_interface_inputs) job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val) @@ -55,14 +50,16 @@ def create(self, create_request: CreateRequest) -> CreateResponse: client = bigquery.Client(project=custom["ProjectID"], location=custom["Location"]) query_job = client.query(task_template.sql.statement, job_config=job_config) - return CreateResponse(job_id=query_job.job_id) + return plugin_system_pb2.TaskCreateResponse(job_id=query_job.job_id) - def poll(self, poll_request: PollRequest) -> PollResponse: + def get( + self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State + ) -> plugin_system_pb2.TaskGetResponse: client = bigquery.Client() - job = client.get_job(poll_request.job_id) - state = convert_to_flyte_state(str(job.state)) + job = client.get_job(job_id) + cur_state = convert_to_flyte_state(str(job.state)) - if poll_request.prev_state != SUCCEEDED and state == SUCCEEDED: + if prev_state != plugin_system_pb2.SUCCEEDED and cur_state == plugin_system_pb2.SUCCEEDED: ctx = FlyteContextManager.current_context() output_location = f"bq://{job.destination.project}:{job.destination.dataset_id}.{job.destination.table_id}" output_file_dict = { @@ -77,13 +74,14 @@ def poll(self, poll_request: PollRequest) -> PollResponse: } ) } - upload_output_file(output_file_dict, poll_request.output_prefix) + upload_output_file(output_file_dict, output_prefix) - return PollResponse(job_id=job.job_id, state=state) + return TaskGetResponse(state=cur_state) - def terminate(self, job_id): + def delete(self, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: client = bigquery.Client() client.cancel_job(job_id) + return plugin_system_pb2.TaskDeleteResponse() BackendPluginRegistry.register(BigQueryPlugin()) From d66c3f8bfb25245aa632b5a082d266734160e90e Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 27 Feb 2023 12:45:30 -0800 Subject: [PATCH 22/50] clean up Signed-off-by: Kevin Su --- backend-plugin-system_grpc.yaml | 2 +- flytekit/core/constants.py | 1 - flytekit/core/utils.py | 6 ---- flytekit/extend/backend/base_plugin.py | 22 --------------- flytekit/extend/backend/grpc_server.py | 7 ++--- flytekit/extend/backend/utils.py | 28 +------------------ .../bigquery/backend_plugin.py | 1 - .../flytekitplugins/bigquery/task.py | 3 -- .../flytekitplugins/spark/task.py | 1 + 9 files changed, 5 insertions(+), 66 deletions(-) diff --git a/backend-plugin-system_grpc.yaml b/backend-plugin-system_grpc.yaml index 971e944513..385b9dfdfb 100644 --- a/backend-plugin-system_grpc.yaml +++ b/backend-plugin-system_grpc.yaml @@ -24,7 +24,7 @@ spec: apiVersion: v1 kind: Service metadata: - name: backend-plugin-system-grpc-svc + name: backend-plugin-system-grpc spec: selector: app: backend-plugin-system-grpc diff --git a/flytekit/core/constants.py b/flytekit/core/constants.py index cb2f97e0c6..cda20602b2 100644 --- a/flytekit/core/constants.py +++ b/flytekit/core/constants.py @@ -2,7 +2,6 @@ OUTPUT_FILE_NAME = "outputs.pb" FUTURES_FILE_NAME = "futures.pb" ERROR_FILE_NAME = "error.pb" -ENABLE_BACKEND_SYSTEM_SERVICE = "ENABLE_BACKEND_SYSTEM_SERVICE" class SdkTaskType(object): diff --git a/flytekit/core/utils.py b/flytekit/core/utils.py index 48a367ebd1..ee2c841465 100644 --- a/flytekit/core/utils.py +++ b/flytekit/core/utils.py @@ -1,4 +1,3 @@ -import os import os as _os import shutil as _shutil import tempfile as _tempfile @@ -7,7 +6,6 @@ from pathlib import Path from typing import Dict, List, Optional -from flytekit.core.constants import ENABLE_BACKEND_SYSTEM_SERVICE from flytekit.loggers import logger from flytekit.models import task as task_models @@ -234,7 +232,3 @@ def __exit__(self, exc_type, exc_val, exc_tb): end_process_time - self._start_process_time, ) ) - - -def is_backend_plugin_service_enabled(): - return os.environ.get(ENABLE_BACKEND_SYSTEM_SERVICE).lower() == "true" diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index c34884eb0e..95c3c8172c 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -1,7 +1,5 @@ import typing from abc import abstractmethod -from dataclasses import dataclass -from typing import Optional from flyteidl.core.tasks_pb2 import TaskTemplate from flyteidl.service import plugin_system_pb2 @@ -9,26 +7,6 @@ from flytekit.models.literals import LiteralMap -@dataclass -class CreateResponse: - job_id: str - output_prefix: str - message: Optional[str] = None - - -@dataclass -class PollRequest: - job_id: str - output_prefix: str - prev_state: plugin_system_pb2.State - - -@dataclass -class PollResponse: - state: plugin_system_pb2.State - message: Optional[str] = None - - class BackendPluginBase: def __init__(self, task_type: str): self._task_type = task_type diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index f3ee0047fd..d09e656031 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -1,10 +1,7 @@ from flyteidl.service import plugin_system_pb2 -from flyteidl.service.plugin_system_pb2_grpc import ( - BackendPluginServiceServicer, - add_BackendPluginServiceServicer_to_server, -) +from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer -from flytekit.extend.backend.base_plugin import BackendPluginRegistry, PollRequest +from flytekit.extend.backend.base_plugin import BackendPluginRegistry from flytekit.extend.backend.model import TaskCreateRequest diff --git a/flytekit/extend/backend/utils.py b/flytekit/extend/backend/utils.py index 2b56b7bb36..11357678eb 100644 --- a/flytekit/extend/backend/utils.py +++ b/flytekit/extend/backend/utils.py @@ -1,34 +1,8 @@ import os import typing -from flyteidl.core import literals_pb2, tasks_pb2 - -from flytekit import FlyteContextManager, logger +from flytekit import FlyteContextManager from flytekit.core import utils -from flytekit.core.utils import load_proto_from_file -from flytekit.models import literals, task -from flytekit.models.literals import LiteralMap -from flytekit.models.task import TaskTemplate - - -def get_task_template(task_template_path: str) -> TaskTemplate: - ctx = FlyteContextManager.current_context() - task_template_local_path = os.path.join(ctx.execution_state.working_dir, "task_template.pb") - ctx.file_access.get_data(task_template_path, task_template_local_path) - task_template_proto = load_proto_from_file(tasks_pb2.TaskTemplate, task_template_local_path) - task_template_model = task.TaskTemplate.from_flyte_idl(task_template_proto) - print(f"Task Template: {task_template_model}") # For debug, will remove it - return task_template_model - - -def get_task_inputs(inputs_path: str) -> LiteralMap: - ctx = FlyteContextManager.current_context() - task_inputs_local_path = os.path.join(ctx.execution_state.working_dir, "inputs.pb") - ctx.file_access.get_data(inputs_path, task_inputs_local_path) - input_proto = utils.load_proto_from_file(literals_pb2.LiteralMap, task_inputs_local_path) - idl_input_literals = literals.LiteralMap.from_flyte_idl(input_proto) - logger.debug(f"Task inputs: {idl_input_literals}") - return idl_input_literals def upload_output_file(output_file_dict: typing.Dict, output_prefix: str): diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index ddaaae3cab..896e08f7b0 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -33,7 +33,6 @@ def create( ) -> plugin_system_pb2.TaskCreateResponse: ctx = FlyteContextManager.current_context() - # 3. Submit the job # TODO: is there any other way to get python interface input? python_interface_inputs = { name: TypeEngine.guess_python_type(lt.type) for name, lt in task_template.interface.inputs.items() diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py index 0ddf684482..a58815ea60 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py @@ -84,7 +84,4 @@ def get_sql(self, settings: SerializationSettings) -> Optional[_task_model.Sql]: return sql def execute(self, **kwargs) -> Any: - if not is_backend_plugin_service_enabled(): - raise Exception("Backend plugin service is not enabled") - raise Exception("Cannot run a SQL Task natively, please mock.") diff --git a/plugins/flytekit-spark/flytekitplugins/spark/task.py b/plugins/flytekit-spark/flytekitplugins/spark/task.py index ad5f91c80b..7b32e9f28b 100644 --- a/plugins/flytekit-spark/flytekitplugins/spark/task.py +++ b/plugins/flytekit-spark/flytekitplugins/spark/task.py @@ -86,6 +86,7 @@ def new_spark_session(name: str, conf: typing.Dict[str, str] = None): # If there is a global SparkSession available, get it and try to stop it. _pyspark.sql.SparkSession.builder.getOrCreate().stop() + return sess_builder.getOrCreate() # SparkSession.Stop does not work correctly, as it stops the session before all the data is written # sess.stop() From 5f0084f90e883cca458988d9252ab16310aef152 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 27 Feb 2023 13:00:54 -0800 Subject: [PATCH 23/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/dummy_plugin.py | 10 ++-------- .../flytekitplugins/bigquery/backend_plugin.py | 2 +- .../flytekit-bigquery/flytekitplugins/bigquery/task.py | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index 28a99ffbfa..02582a6174 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -5,14 +5,8 @@ from flytekit import FlyteContextManager, StructuredDataset from flytekit.core import constants from flytekit.core.type_engine import TypeEngine -from flytekit.extend.backend.base_plugin import ( - BackendPluginBase, - BackendPluginRegistry, - CreateResponse, - PollRequest, - PollResponse, -) -from flytekit.extend.backend.utils import get_task_inputs, get_task_template, upload_output_file +from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry +from flytekit.extend.backend.utils import upload_output_file from flytekit.models import literals from flytekit.models.literals import LiteralMap from flytekit.models.task import TaskTemplate diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 896e08f7b0..1d3b5e15d4 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -8,7 +8,7 @@ from flytekit.core import constants from flytekit.core.type_engine import TypeEngine from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry, convert_to_flyte_state -from flytekit.extend.backend.utils import get_task_inputs, get_task_template, upload_output_file +from flytekit.extend.backend.utils import upload_output_file from flytekit.models import literals from flytekit.models.literals import LiteralMap from flytekit.models.task import TaskTemplate diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py index a58815ea60..5cf30a8c13 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/task.py @@ -7,7 +7,6 @@ from flytekit import StructuredDataset from flytekit.configuration import SerializationSettings -from flytekit.core.utils import is_backend_plugin_service_enabled from flytekit.extend import SQLTask from flytekit.models import task as _task_model From 2edc6205ea1d4793235e03f056af0f17a5716fe7 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 27 Feb 2023 15:27:23 -0800 Subject: [PATCH 24/50] test Signed-off-by: Kevin Su --- .github/workflows/pythonbuild.yml | 3 ++ ...pc.yaml => backend-plugin-system-grpc.yaml | 0 flytekit/extend/backend/base_plugin.py | 2 +- flytekit/extend/backend/dummy_plugin.py | 1 + flytekit/extend/backend/grpc_server.py | 9 ++-- .../bigquery/backend_plugin.py | 8 ++-- .../unit/extend/test_backend_plugin.py | 39 +++++++++++++++++ tests/flytekit/unit/extend/test_model.py | 43 +++++++++++++++++++ tests/flytekit/unit/extend/test_utils.py | 28 ++++++++++++ 9 files changed, 121 insertions(+), 12 deletions(-) rename backend-plugin-system_grpc.yaml => backend-plugin-system-grpc.yaml (100%) create mode 100644 tests/flytekit/unit/extend/test_backend_plugin.py create mode 100644 tests/flytekit/unit/extend/test_model.py create mode 100644 tests/flytekit/unit/extend/test_utils.py diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 442f7c01f8..d78f44b391 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -39,6 +39,9 @@ jobs: - name: Install dependencies run: | make setup + RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc#egg=flytekitplugins-bigquery&subdirectory=plugins/flytekit-bigquery" + RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc" + RUN pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" pip freeze - name: Test with coverage run: | diff --git a/backend-plugin-system_grpc.yaml b/backend-plugin-system-grpc.yaml similarity index 100% rename from backend-plugin-system_grpc.yaml rename to backend-plugin-system-grpc.yaml diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 95c3c8172c..03edbe0a0b 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -21,7 +21,7 @@ def initialize(self): @abstractmethod def create( - self, inputs: LiteralMap, output_prefix: str, task_template: TaskTemplate + self, inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate ) -> plugin_system_pb2.TaskCreateResponse: pass diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index 02582a6174..eaa74fed05 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -14,6 +14,7 @@ # This plugin is used for performance benchmarking +# will remove this file before pr is merged class DummyPlugin(BackendPluginBase): def __init__(self): super().__init__(task_type="dummy") diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index d09e656031..e31a04bdb7 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -9,15 +9,12 @@ class BackendPluginServer(BackendPluginServiceServicer): def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context) -> plugin_system_pb2.TaskCreateResponse: req = TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) - res = plugin.create(req.inputs, req.output_prefix, req.template) - return plugin_system_pb2.TaskCreateResponse(job_id=res.job_id, message=res.message) + return plugin.create(req.inputs, req.output_prefix, req.template) def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context): plugin = BackendPluginRegistry.get_plugin(request.task_type) - res = plugin.get(job_id=request.job_id, output_prefix=request.output_prefix, prev_state=request.prev_state) - return plugin_system_pb2.TaskGetResponse(state=res.state, message=res.message) + return plugin.get(job_id=request.job_id, output_prefix=request.output_prefix, prev_state=request.prev_state) def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context): plugin = BackendPluginRegistry.get_plugin(request.task_type) - res = plugin.delete(request.job_id) - return plugin_system_pb2.TaskDeleteResponse() + return plugin.delete(request.job_id) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 1d3b5e15d4..a1cad5d354 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Optional from flyteidl.service import plugin_system_pb2 from flyteidl.service.plugin_system_pb2 import TaskGetResponse @@ -25,19 +25,17 @@ def __init__(self): super().__init__(task_type="bigquery") def initialize(self): - # TODO: Read GOOGLE_APPLICATION_CREDENTIALS from secret. If not found, raise an error. pass def create( - self, inputs: LiteralMap, output_prefix: str, task_template: TaskTemplate + self, inputs: Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate ) -> plugin_system_pb2.TaskCreateResponse: ctx = FlyteContextManager.current_context() - - # TODO: is there any other way to get python interface input? python_interface_inputs = { name: TypeEngine.guess_python_type(lt.type) for name, lt in task_template.interface.inputs.items() } native_inputs = TypeEngine.literal_map_to_kwargs(ctx, inputs, python_interface_inputs) + job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val) diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py new file mode 100644 index 0000000000..c63aee1e80 --- /dev/null +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -0,0 +1,39 @@ +import typing + +from flyteidl.service import plugin_system_pb2 + +from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry +from flytekit.models.literals import LiteralMap +from flytekit.models.task import TaskTemplate + + +class DummyPlugin(BackendPluginBase): + def __init__(self): + super().__init__(task_type="dummy") + + def initialize(self): + pass + + def create( + self, inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate + ) -> plugin_system_pb2.TaskCreateResponse: + return plugin_system_pb2.TaskCreateResponse(job_id="dummy_id") + + def get( + self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State + ) -> plugin_system_pb2.TaskGetResponse: + return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) + + def delete(self, job_id) -> plugin_system_pb2.TaskDeleteResponse: + print("deleting") + return plugin_system_pb2.TaskDeleteResponse() + + +BackendPluginRegistry.register(DummyPlugin()) + + +def test_plugin(): + p = BackendPluginRegistry.get_plugin("dummy") + assert p.create(None, "/tmp", None).job_id == "dummy_id" + assert p.get("id", "/tmp", plugin_system_pb2.RUNNING).state == plugin_system_pb2.SUCCEEDED + assert p.delete("id") == plugin_system_pb2.TaskDeleteResponse() diff --git a/tests/flytekit/unit/extend/test_model.py b/tests/flytekit/unit/extend/test_model.py new file mode 100644 index 0000000000..bf82db1469 --- /dev/null +++ b/tests/flytekit/unit/extend/test_model.py @@ -0,0 +1,43 @@ +from datetime import timedelta + +from flytekit.extend.backend.model import TaskCreateRequest +from flytekit.models import literals +from flytekit.models.core import identifier +from flytekit.models.interface import TypedInterface +from flytekit.models.literals import Literal, LiteralMap, Primitive, Scalar +from flytekit.models.task import Container, Resources, RuntimeMetadata, TaskMetadata, TaskTemplate + + +def test_create_request(): + inputs = LiteralMap({"foo": Literal(scalar=Scalar(primitive=Primitive(integer=2)))}) + resource = [Resources.ResourceEntry(Resources.ResourceName.CPU, "1")] + resources = Resources(resource, resource) + template = TaskTemplate( + identifier.Identifier(identifier.ResourceType.TASK, "project", "domain", "name", "version"), + "python", + TaskMetadata( + True, + RuntimeMetadata(RuntimeMetadata.RuntimeType.FLYTE_SDK, "1.0.0", "python"), + timedelta(days=1), + literals.RetryStrategy(3), + True, + "0.1.1b0", + "This is deprecated!", + True, + "A", + ), + TypedInterface(inputs={}, outputs={}), + {"a": 1, "b": {"c": 2, "d": 3}}, + container=Container( + "my_image", + ["this", "is", "a", "cmd"], + ["this", "is", "an", "arg"], + resources, + {}, + {}, + ), + ) + req = TaskCreateRequest(inputs=inputs, template=template) + assert req.inputs == inputs + assert req.template == template + assert req == TaskCreateRequest.from_flyte_idl(req.to_flyte_idl()) diff --git a/tests/flytekit/unit/extend/test_utils.py b/tests/flytekit/unit/extend/test_utils.py new file mode 100644 index 0000000000..9d4f02f7dc --- /dev/null +++ b/tests/flytekit/unit/extend/test_utils.py @@ -0,0 +1,28 @@ +import os +from tempfile import mkdtemp + +from flytekit import FlyteContextManager, LiteralType, StructuredDataset, StructuredDatasetType +from flytekit.core import constants +from flytekit.core.type_engine import TypeEngine +from flytekit.extend.backend.utils import upload_output_file +from flytekit.models import literals + + +def test_upload_output_file(): + ctx = FlyteContextManager.current_context() + output_file_dict = { + constants.OUTPUT_FILE_NAME: literals.LiteralMap( + { + "results": TypeEngine.to_literal( + ctx, + StructuredDataset(uri="dummy_uri"), + StructuredDataset, + LiteralType(structured_dataset_type=StructuredDatasetType(format="")), + ) + } + ) + } + tmp_dir = mkdtemp(prefix="flyte") + upload_output_file(output_file_dict, tmp_dir) + files = os.listdir(tmp_dir) + assert len(files) == 1 From fd2b9b3d342e0504fe7daa3f5623c2f319ada02f Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 1 Mar 2023 10:42:20 -0800 Subject: [PATCH 25/50] nit Signed-off-by: Kevin Su --- .github/workflows/pythonbuild.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index d78f44b391..dacdaf42be 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -39,9 +39,9 @@ jobs: - name: Install dependencies run: | make setup - RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc#egg=flytekitplugins-bigquery&subdirectory=plugins/flytekit-bigquery" - RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc" - RUN pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" + pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc#egg=flytekitplugins-bigquery&subdirectory=plugins/flytekit-bigquery" + pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc" + pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" pip freeze - name: Test with coverage run: | From e71b6f60007e8fe8faf6d7604c6be0beb0313974 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 13 Mar 2023 00:56:25 -0700 Subject: [PATCH 26/50] update port Signed-off-by: Kevin Su --- flytekit/clis/sdk_in_container/serve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 0e5c34a931..126c2897fb 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -17,6 +17,6 @@ def serve(_: click.Context): server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_BackendPluginServiceServicer_to_server(BackendPluginServer(), server) - server.add_insecure_port("[::]:8000") + server.add_insecure_port("[::]:9090") server.start() server.wait_for_termination() From 2b76331f7619e24100b93219c411a3e7b71c6e50 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 13 Mar 2023 11:28:49 -0700 Subject: [PATCH 27/50] wip Signed-off-by: Kevin Su --- flytekit/clis/sdk_in_container/serve.py | 13 ++++++++++--- flytekit/extend/backend/dummy_plugin.py | 22 ++-------------------- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 126c2897fb..ba687cdced 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -11,12 +11,19 @@ @click.command("serve", help=_serve_help) +@click.option( + "--port", + default="9090", + is_flag=False, + type=int, + help="Grpc port for the flyteplugins service", +) @click.pass_context -def serve(_: click.Context): - print("Starting a grpc server for the backend plugin system.") +def serve(_: click.Context, port): + cli_logger.info("Starting a grpc server for the flyteplugins service.") server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_BackendPluginServiceServicer_to_server(BackendPluginServer(), server) - server.add_insecure_port("[::]:9090") + server.add_insecure_port(f"[::]:{port}") server.start() server.wait_for_termination() diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index eaa74fed05..d00f002647 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -2,15 +2,9 @@ from flyteidl.service import plugin_system_pb2 -from flytekit import FlyteContextManager, StructuredDataset -from flytekit.core import constants -from flytekit.core.type_engine import TypeEngine from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry -from flytekit.extend.backend.utils import upload_output_file -from flytekit.models import literals from flytekit.models.literals import LiteralMap from flytekit.models.task import TaskTemplate -from flytekit.models.types import LiteralType, StructuredDatasetType # This plugin is used for performance benchmarking @@ -25,30 +19,18 @@ def initialize(self): def create( self, inputs: LiteralMap, output_prefix: str, task_template: TaskTemplate ) -> plugin_system_pb2.TaskCreateResponse: + print("creating") return plugin_system_pb2.TaskCreateResponse(job_id="fake_id") def get( self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State ) -> plugin_system_pb2.TaskGetResponse: + print("polling") if prev_state == plugin_system_pb2.SUCCEEDED: return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) x = randint(1, 100) if x > 50: - ctx = FlyteContextManager.current_context() - output_file_dict = { - constants.OUTPUT_FILE_NAME: literals.LiteralMap( - { - "results": TypeEngine.to_literal( - ctx, - StructuredDataset(uri="fake_uri"), - StructuredDataset, - LiteralType(structured_dataset_type=StructuredDatasetType(format="")), - ) - } - ) - } - upload_output_file(output_file_dict, output_prefix) state = plugin_system_pb2.SUCCEEDED else: state = plugin_system_pb2.RUNNING From c342d3784e211774265d2a0a38fc8c74cc9dd1b3 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 13 Mar 2023 11:52:26 -0700 Subject: [PATCH 28/50] nit Signed-off-by: Kevin Su --- ...em-grpc => Dockerfile.flyteplugins-service | 2 +- backend-plugin-system-grpc.yaml | 34 ------------------- flyteplugins-service.yaml | 34 +++++++++++++++++++ 3 files changed, 35 insertions(+), 35 deletions(-) rename Dockerfile.backend-plugin-system-grpc => Dockerfile.flyteplugins-service (95%) delete mode 100644 backend-plugin-system-grpc.yaml create mode 100644 flyteplugins-service.yaml diff --git a/Dockerfile.backend-plugin-system-grpc b/Dockerfile.flyteplugins-service similarity index 95% rename from Dockerfile.backend-plugin-system-grpc rename to Dockerfile.flyteplugins-service index a8fee1053f..08386cbd32 100644 --- a/Dockerfile.backend-plugin-system-grpc +++ b/Dockerfile.flyteplugins-service @@ -14,4 +14,4 @@ RUN pip install numpy==1.23.1 RUN pip install awscli RUN pip install protobuf==3.20.1 -CMD pyflyte serve +CMD pyflyte serve --port 9090 diff --git a/backend-plugin-system-grpc.yaml b/backend-plugin-system-grpc.yaml deleted file mode 100644 index 385b9dfdfb..0000000000 --- a/backend-plugin-system-grpc.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: backend-plugin-system-grpc - labels: - app: backend-plugin-system-grpc -spec: - replicas: 1 - selector: - matchLabels: - app: backend-plugin-system-grpc - template: - metadata: - labels: - app: backend-plugin-system-grpc - spec: - containers: - - name: backend-plugin-system-grpc - image: pingsutw/backend-plugin-system-grpc:v1 - ports: - - containerPort: 8000 - ---- -apiVersion: v1 -kind: Service -metadata: - name: backend-plugin-system-grpc -spec: - selector: - app: backend-plugin-system-grpc - ports: - - protocol: TCP - port: 8000 - targetPort: 8000 diff --git a/flyteplugins-service.yaml b/flyteplugins-service.yaml new file mode 100644 index 0000000000..704c93d8d4 --- /dev/null +++ b/flyteplugins-service.yaml @@ -0,0 +1,34 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: flyteplugins-service + labels: + app: flyteplugins-service +spec: + replicas: 1 + selector: + matchLabels: + app: flyteplugins-service + template: + metadata: + labels: + app: flyteplugins-service + spec: + containers: + - name: flyteplugins-service + image: pingsutw/flyteplugins-service:v1 + ports: + - containerPort: 9090 + +--- +apiVersion: v1 +kind: Service +metadata: + name: flyteplugins-service +spec: + selector: + app: flyteplugins-service + ports: + - protocol: TCP + port: 9090 + targetPort: 9090 From ee4a180628a17c5a71780b41104cfeba7d746e42 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 14 Mar 2023 11:14:37 -0700 Subject: [PATCH 29/50] update port Signed-off-by: Kevin Su --- .github/workflows/pythonbuild.yml | 1 + Dockerfile.flyteplugins-service | 17 ------------- flytekit/clis/sdk_in_container/serve.py | 2 +- flyteplugins-service.yaml | 34 ------------------------- 4 files changed, 2 insertions(+), 52 deletions(-) delete mode 100644 Dockerfile.flyteplugins-service delete mode 100644 flyteplugins-service.yaml diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index dacdaf42be..702189946b 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -131,6 +131,7 @@ jobs: pip install -r requirements.txt if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi pip install -U https://github.com/flyteorg/flytekit/archive/${{ github.sha }}.zip#egg=flytekit + pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" pip freeze - name: Test with coverage run: | diff --git a/Dockerfile.flyteplugins-service b/Dockerfile.flyteplugins-service deleted file mode 100644 index 08386cbd32..0000000000 --- a/Dockerfile.flyteplugins-service +++ /dev/null @@ -1,17 +0,0 @@ -FROM python:3.9-slim-buster - -MAINTAINER Flyte Team -LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytekit - -WORKDIR /root -ENV PYTHONPATH /root - -RUN apt-get update && apt-get install -y git -RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc#egg=flytekitplugins-bigquery&subdirectory=plugins/flytekit-bigquery" -RUN pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc" -RUN pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" -RUN pip install numpy==1.23.1 -RUN pip install awscli -RUN pip install protobuf==3.20.1 - -CMD pyflyte serve --port 9090 diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index ba687cdced..e59f765077 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -13,7 +13,7 @@ @click.command("serve", help=_serve_help) @click.option( "--port", - default="9090", + default="80", is_flag=False, type=int, help="Grpc port for the flyteplugins service", diff --git a/flyteplugins-service.yaml b/flyteplugins-service.yaml deleted file mode 100644 index 704c93d8d4..0000000000 --- a/flyteplugins-service.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: flyteplugins-service - labels: - app: flyteplugins-service -spec: - replicas: 1 - selector: - matchLabels: - app: flyteplugins-service - template: - metadata: - labels: - app: flyteplugins-service - spec: - containers: - - name: flyteplugins-service - image: pingsutw/flyteplugins-service:v1 - ports: - - containerPort: 9090 - ---- -apiVersion: v1 -kind: Service -metadata: - name: flyteplugins-service -spec: - selector: - app: flyteplugins-service - ports: - - protocol: TCP - port: 9090 - targetPort: 9090 From 1a908c4bfb5bfe43783b9e961aa9b460b5de3f87 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 14 Mar 2023 12:37:23 -0700 Subject: [PATCH 30/50] update Signed-off-by: Kevin Su --- flytekit/extend/backend/utils.py | 12 ---------- tests/flytekit/unit/extend/test_utils.py | 28 ------------------------ 2 files changed, 40 deletions(-) delete mode 100644 flytekit/extend/backend/utils.py delete mode 100644 tests/flytekit/unit/extend/test_utils.py diff --git a/flytekit/extend/backend/utils.py b/flytekit/extend/backend/utils.py deleted file mode 100644 index 11357678eb..0000000000 --- a/flytekit/extend/backend/utils.py +++ /dev/null @@ -1,12 +0,0 @@ -import os -import typing - -from flytekit import FlyteContextManager -from flytekit.core import utils - - -def upload_output_file(output_file_dict: typing.Dict, output_prefix: str): - ctx = FlyteContextManager.current_context() - for k, v in output_file_dict.items(): - utils.write_proto_to_file(v.to_flyte_idl(), os.path.join(ctx.execution_state.engine_dir, k)) - ctx.file_access.put_data(ctx.execution_state.engine_dir, output_prefix, is_multipart=True) diff --git a/tests/flytekit/unit/extend/test_utils.py b/tests/flytekit/unit/extend/test_utils.py deleted file mode 100644 index 9d4f02f7dc..0000000000 --- a/tests/flytekit/unit/extend/test_utils.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from tempfile import mkdtemp - -from flytekit import FlyteContextManager, LiteralType, StructuredDataset, StructuredDatasetType -from flytekit.core import constants -from flytekit.core.type_engine import TypeEngine -from flytekit.extend.backend.utils import upload_output_file -from flytekit.models import literals - - -def test_upload_output_file(): - ctx = FlyteContextManager.current_context() - output_file_dict = { - constants.OUTPUT_FILE_NAME: literals.LiteralMap( - { - "results": TypeEngine.to_literal( - ctx, - StructuredDataset(uri="dummy_uri"), - StructuredDataset, - LiteralType(structured_dataset_type=StructuredDatasetType(format="")), - ) - } - ) - } - tmp_dir = mkdtemp(prefix="flyte") - upload_output_file(output_file_dict, tmp_dir) - files = os.listdir(tmp_dir) - assert len(files) == 1 From 764c0f5fcca60b1b986cd1045faedbd85d8cc9cb Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 14 Mar 2023 12:48:52 -0700 Subject: [PATCH 31/50] update get request Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 4 +- flytekit/extend/backend/dummy_plugin.py | 4 +- flytekit/extend/backend/grpc_server.py | 6 +-- .../bigquery/backend_plugin.py | 39 +++++++++---------- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 03edbe0a0b..0a18cab406 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -26,9 +26,7 @@ def create( pass @abstractmethod - def get( - self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State - ) -> plugin_system_pb2.TaskGetResponse: + def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: pass @abstractmethod diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py index d00f002647..f88fc167be 100644 --- a/flytekit/extend/backend/dummy_plugin.py +++ b/flytekit/extend/backend/dummy_plugin.py @@ -22,9 +22,7 @@ def create( print("creating") return plugin_system_pb2.TaskCreateResponse(job_id="fake_id") - def get( - self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State - ) -> plugin_system_pb2.TaskGetResponse: + def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: print("polling") if prev_state == plugin_system_pb2.SUCCEEDED: return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index e31a04bdb7..a4deb9ff32 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -11,10 +11,10 @@ def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context) -> p plugin = BackendPluginRegistry.get_plugin(req.template.type) return plugin.create(req.inputs, req.output_prefix, req.template) - def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context): + def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context) -> plugin_system_pb2.TaskGetResponse: plugin = BackendPluginRegistry.get_plugin(request.task_type) - return plugin.get(job_id=request.job_id, output_prefix=request.output_prefix, prev_state=request.prev_state) + return plugin.get(job_id=request.job_id, prev_state=request.prev_state) - def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context): + def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context) -> plugin_system_pb2.TaskDeleteResponse: plugin = BackendPluginRegistry.get_plugin(request.task_type) return plugin.delete(request.job_id) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index a1cad5d354..4117caa260 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -5,10 +5,8 @@ from google.cloud import bigquery from flytekit import FlyteContextManager, StructuredDataset -from flytekit.core import constants from flytekit.core.type_engine import TypeEngine from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry, convert_to_flyte_state -from flytekit.extend.backend.utils import upload_output_file from flytekit.models import literals from flytekit.models.literals import LiteralMap from flytekit.models.task import TaskTemplate @@ -49,31 +47,30 @@ def create( return plugin_system_pb2.TaskCreateResponse(job_id=query_job.job_id) - def get( - self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State - ) -> plugin_system_pb2.TaskGetResponse: + def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: + if prev_state == plugin_system_pb2.SUCCEEDED: + return TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) + client = bigquery.Client() job = client.get_job(job_id) cur_state = convert_to_flyte_state(str(job.state)) + res = None - if prev_state != plugin_system_pb2.SUCCEEDED and cur_state == plugin_system_pb2.SUCCEEDED: + if cur_state == plugin_system_pb2.SUCCEEDED: ctx = FlyteContextManager.current_context() output_location = f"bq://{job.destination.project}:{job.destination.dataset_id}.{job.destination.table_id}" - output_file_dict = { - constants.OUTPUT_FILE_NAME: literals.LiteralMap( - { - "results": TypeEngine.to_literal( - ctx, - StructuredDataset(uri=output_location), - StructuredDataset, - LiteralType(structured_dataset_type=StructuredDatasetType(format="")), - ) - } - ) - } - upload_output_file(output_file_dict, output_prefix) - - return TaskGetResponse(state=cur_state) + res = literals.LiteralMap( + { + "results": TypeEngine.to_literal( + ctx, + StructuredDataset(uri=output_location), + StructuredDataset, + LiteralType(structured_dataset_type=StructuredDatasetType(format="")), + ) + } + ) + + return TaskGetResponse(state=cur_state, outputs=res) def delete(self, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: client = bigquery.Client() From f402d573e0f03b82c5f5a851b768d7ecba0f8e4a Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 14 Mar 2023 15:47:17 -0700 Subject: [PATCH 32/50] more tets Signed-off-by: Kevin Su --- flytekit/extend/backend/__init__.py | 1 - flytekit/extend/backend/base_plugin.py | 4 -- flytekit/extend/backend/dummy_plugin.py | 43 ------------------- .../bigquery/backend_plugin.py | 3 -- .../unit/extend/test_backend_plugin.py | 17 +++++--- 5 files changed, 11 insertions(+), 57 deletions(-) delete mode 100644 flytekit/extend/backend/dummy_plugin.py diff --git a/flytekit/extend/backend/__init__.py b/flytekit/extend/backend/__init__.py index 5061c6ea1b..e69de29bb2 100644 --- a/flytekit/extend/backend/__init__.py +++ b/flytekit/extend/backend/__init__.py @@ -1 +0,0 @@ -from .dummy_plugin import DummyPlugin diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 0a18cab406..10e3650c45 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -15,10 +15,6 @@ def __init__(self, task_type: str): def task_type(self) -> str: return self._task_type - @abstractmethod - def initialize(self): - pass - @abstractmethod def create( self, inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate diff --git a/flytekit/extend/backend/dummy_plugin.py b/flytekit/extend/backend/dummy_plugin.py deleted file mode 100644 index f88fc167be..0000000000 --- a/flytekit/extend/backend/dummy_plugin.py +++ /dev/null @@ -1,43 +0,0 @@ -from random import randint - -from flyteidl.service import plugin_system_pb2 - -from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry -from flytekit.models.literals import LiteralMap -from flytekit.models.task import TaskTemplate - - -# This plugin is used for performance benchmarking -# will remove this file before pr is merged -class DummyPlugin(BackendPluginBase): - def __init__(self): - super().__init__(task_type="dummy") - - def initialize(self): - pass - - def create( - self, inputs: LiteralMap, output_prefix: str, task_template: TaskTemplate - ) -> plugin_system_pb2.TaskCreateResponse: - print("creating") - return plugin_system_pb2.TaskCreateResponse(job_id="fake_id") - - def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: - print("polling") - if prev_state == plugin_system_pb2.SUCCEEDED: - return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) - - x = randint(1, 100) - if x > 50: - state = plugin_system_pb2.SUCCEEDED - else: - state = plugin_system_pb2.RUNNING - - return plugin_system_pb2.TaskGetResponse(state=state) - - def delete(self, job_id) -> plugin_system_pb2.TaskDeleteResponse: - print("deleting") - return plugin_system_pb2.TaskDeleteResponse() - - -BackendPluginRegistry.register(DummyPlugin()) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 4117caa260..94dc8701c2 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -22,9 +22,6 @@ class BigQueryPlugin(BackendPluginBase): def __init__(self): super().__init__(task_type="bigquery") - def initialize(self): - pass - def create( self, inputs: Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate ) -> plugin_system_pb2.TaskCreateResponse: diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index c63aee1e80..1a7d6a4306 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -19,21 +19,26 @@ def create( ) -> plugin_system_pb2.TaskCreateResponse: return plugin_system_pb2.TaskCreateResponse(job_id="dummy_id") - def get( - self, job_id: str, output_prefix: str, prev_state: plugin_system_pb2.State - ) -> plugin_system_pb2.TaskGetResponse: + def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) def delete(self, job_id) -> plugin_system_pb2.TaskDeleteResponse: - print("deleting") return plugin_system_pb2.TaskDeleteResponse() BackendPluginRegistry.register(DummyPlugin()) -def test_plugin(): +def test_base_plugin(): + p = BackendPluginBase(task_type="dummy") + assert p.task_type == "dummy" + p.create(None, "/tmp", None) + p.get("id", plugin_system_pb2.RUNNING) + p.delete("id") + + +def test_dummy_plugin(): p = BackendPluginRegistry.get_plugin("dummy") assert p.create(None, "/tmp", None).job_id == "dummy_id" - assert p.get("id", "/tmp", plugin_system_pb2.RUNNING).state == plugin_system_pb2.SUCCEEDED + assert p.get("id", plugin_system_pb2.RUNNING).state == plugin_system_pb2.SUCCEEDED assert p.delete("id") == plugin_system_pb2.TaskDeleteResponse() From bc30f511d5f8734e13fbdebab07219e319c34fc5 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 14 Mar 2023 16:37:19 -0700 Subject: [PATCH 33/50] remove prev state Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 2 +- flytekit/extend/backend/grpc_server.py | 2 +- .../flytekitplugins/bigquery/backend_plugin.py | 5 +---- tests/flytekit/unit/extend/test_backend_plugin.py | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 10e3650c45..b262ff3d89 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -22,7 +22,7 @@ def create( pass @abstractmethod - def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: + def get(self, job_id: str) -> plugin_system_pb2.TaskGetResponse: pass @abstractmethod diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index a4deb9ff32..c945defa0a 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -13,7 +13,7 @@ def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context) -> p def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context) -> plugin_system_pb2.TaskGetResponse: plugin = BackendPluginRegistry.get_plugin(request.task_type) - return plugin.get(job_id=request.job_id, prev_state=request.prev_state) + return plugin.get(job_id=request.job_id) def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context) -> plugin_system_pb2.TaskDeleteResponse: plugin = BackendPluginRegistry.get_plugin(request.task_type) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 94dc8701c2..9d09ac6795 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -44,10 +44,7 @@ def create( return plugin_system_pb2.TaskCreateResponse(job_id=query_job.job_id) - def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: - if prev_state == plugin_system_pb2.SUCCEEDED: - return TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) - + def get(self, job_id: str) -> plugin_system_pb2.TaskGetResponse: client = bigquery.Client() job = client.get_job(job_id) cur_state = convert_to_flyte_state(str(job.state)) diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 1a7d6a4306..92f653d494 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -19,7 +19,7 @@ def create( ) -> plugin_system_pb2.TaskCreateResponse: return plugin_system_pb2.TaskCreateResponse(job_id="dummy_id") - def get(self, job_id: str, prev_state: plugin_system_pb2.State) -> plugin_system_pb2.TaskGetResponse: + def get(self, job_id: str) -> plugin_system_pb2.TaskGetResponse: return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) def delete(self, job_id) -> plugin_system_pb2.TaskDeleteResponse: From 1c1695213a5f82e3403d6721ef79487f10f22948 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 14 Mar 2023 16:59:21 -0700 Subject: [PATCH 34/50] nit Signed-off-by: Kevin Su --- tests/flytekit/unit/extend/test_backend_plugin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 92f653d494..4dd5e5d7af 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -33,12 +33,12 @@ def test_base_plugin(): p = BackendPluginBase(task_type="dummy") assert p.task_type == "dummy" p.create(None, "/tmp", None) - p.get("id", plugin_system_pb2.RUNNING) + p.get("id") p.delete("id") def test_dummy_plugin(): p = BackendPluginRegistry.get_plugin("dummy") assert p.create(None, "/tmp", None).job_id == "dummy_id" - assert p.get("id", plugin_system_pb2.RUNNING).state == plugin_system_pb2.SUCCEEDED + assert p.get("id").state == plugin_system_pb2.SUCCEEDED assert p.delete("id") == plugin_system_pb2.TaskDeleteResponse() From f044e286dc2bb257aaa1f5f8dbe7756017859db4 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 27 Mar 2023 12:54:24 -0700 Subject: [PATCH 35/50] error handling Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 11 +++++-- flytekit/extend/backend/grpc_server.py | 29 ++++++++++++++----- .../bigquery/backend_plugin.py | 12 ++++++-- .../unit/extend/test_backend_plugin.py | 11 +++++-- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index b262ff3d89..4489af0515 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -1,6 +1,7 @@ import typing from abc import abstractmethod +import grpc from flyteidl.core.tasks_pb2 import TaskTemplate from flyteidl.service import plugin_system_pb2 @@ -17,16 +18,20 @@ def task_type(self) -> str: @abstractmethod def create( - self, inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate + self, + context: grpc.ServicerContext, + inputs: typing.Optional[LiteralMap], + output_prefix: str, + task_template: TaskTemplate, ) -> plugin_system_pb2.TaskCreateResponse: pass @abstractmethod - def get(self, job_id: str) -> plugin_system_pb2.TaskGetResponse: + def get(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskGetResponse: pass @abstractmethod - def delete(self, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: + def delete(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: pass diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/grpc_server.py index c945defa0a..fd1209695e 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/grpc_server.py @@ -1,3 +1,4 @@ +import grpc from flyteidl.service import plugin_system_pb2 from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer @@ -7,14 +8,28 @@ class BackendPluginServer(BackendPluginServiceServicer): def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context) -> plugin_system_pb2.TaskCreateResponse: - req = TaskCreateRequest.from_flyte_idl(request) - plugin = BackendPluginRegistry.get_plugin(req.template.type) - return plugin.create(req.inputs, req.output_prefix, req.template) + try: + req = TaskCreateRequest.from_flyte_idl(request) + plugin = BackendPluginRegistry.get_plugin(req.template.type) + return plugin.create( + context=context, inputs=req.inputs, output_prefix=req.output_prefix, task_template=req.template + ) + except Exception as e: + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(f"failed to create task with error {e}") def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context) -> plugin_system_pb2.TaskGetResponse: - plugin = BackendPluginRegistry.get_plugin(request.task_type) - return plugin.get(job_id=request.job_id) + try: + plugin = BackendPluginRegistry.get_plugin(request.task_type) + return plugin.get(context=context, job_id=request.job_id) + except Exception as e: + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(f"failed to get task with error {e}") def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context) -> plugin_system_pb2.TaskDeleteResponse: - plugin = BackendPluginRegistry.get_plugin(request.task_type) - return plugin.delete(request.job_id) + try: + plugin = BackendPluginRegistry.get_plugin(request.task_type) + return plugin.delete(context=context, job_id=request.job_id) + except Exception as e: + context.set_code(grpc.StatusCode.INTERNAL) + context.set_details(f"failed to delete task with error {e}") diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 9d09ac6795..1b09ba9dfa 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -1,5 +1,6 @@ from typing import Dict, Optional +import grpc from flyteidl.service import plugin_system_pb2 from flyteidl.service.plugin_system_pb2 import TaskGetResponse from google.cloud import bigquery @@ -23,8 +24,13 @@ def __init__(self): super().__init__(task_type="bigquery") def create( - self, inputs: Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate + self, + context: grpc.ServicerContext, + inputs: Optional[LiteralMap], + output_prefix: str, + task_template: TaskTemplate, ) -> plugin_system_pb2.TaskCreateResponse: + ctx = FlyteContextManager.current_context() python_interface_inputs = { name: TypeEngine.guess_python_type(lt.type) for name, lt in task_template.interface.inputs.items() @@ -44,7 +50,7 @@ def create( return plugin_system_pb2.TaskCreateResponse(job_id=query_job.job_id) - def get(self, job_id: str) -> plugin_system_pb2.TaskGetResponse: + def get(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskGetResponse: client = bigquery.Client() job = client.get_job(job_id) cur_state = convert_to_flyte_state(str(job.state)) @@ -66,7 +72,7 @@ def get(self, job_id: str) -> plugin_system_pb2.TaskGetResponse: return TaskGetResponse(state=cur_state, outputs=res) - def delete(self, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: + def delete(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: client = bigquery.Client() client.cancel_job(job_id) return plugin_system_pb2.TaskDeleteResponse() diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 4dd5e5d7af..2115ca80a4 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -1,5 +1,6 @@ import typing +import grpc from flyteidl.service import plugin_system_pb2 from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry @@ -15,14 +16,18 @@ def initialize(self): pass def create( - self, inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate + self, + context: grpc.ServicerContext, + inputs: typing.Optional[LiteralMap], + output_prefix: str, + task_template: TaskTemplate, ) -> plugin_system_pb2.TaskCreateResponse: return plugin_system_pb2.TaskCreateResponse(job_id="dummy_id") - def get(self, job_id: str) -> plugin_system_pb2.TaskGetResponse: + def get(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskGetResponse: return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) - def delete(self, job_id) -> plugin_system_pb2.TaskDeleteResponse: + def delete(self, context: grpc.ServicerContext, job_id) -> plugin_system_pb2.TaskDeleteResponse: return plugin_system_pb2.TaskDeleteResponse() From 59714f90dc9109b5bfce87ab3ce204f64b338cd6 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 31 Mar 2023 15:14:14 -0700 Subject: [PATCH 36/50] wip Signed-off-by: Kevin Su --- .github/workflows/pythonbuild.yml | 4 --- flytekit/clis/sdk_in_container/serve.py | 11 +++--- flytekit/extend/backend/base_plugin.py | 26 +++++++++----- ...c_server.py => external_plugin_service.py} | 19 ++++++---- flytekit/extend/backend/model.py | 4 +-- .../bigquery/backend_plugin.py | 24 +++++++------ setup.py | 2 +- .../unit/extend/test_backend_plugin.py | 35 ++++++++++++------- 8 files changed, 74 insertions(+), 51 deletions(-) rename flytekit/extend/backend/{grpc_server.py => external_plugin_service.py} (67%) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 76a726d871..9c9fb1b917 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -39,9 +39,6 @@ jobs: - name: Install dependencies run: | make setup - pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc#egg=flytekitplugins-bigquery&subdirectory=plugins/flytekit-bigquery" - pip install "git+https://github.com/flyteorg/flytekit@backend-plugin-system-grpc" - pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" pip freeze - name: Test with coverage run: | @@ -154,7 +151,6 @@ jobs: pip install -r requirements.txt if [ -f dev-requirements.txt ]; then pip install -r dev-requirements.txt; fi pip install -U https://github.com/flyteorg/flytekit/archive/${{ github.sha }}.zip#egg=flytekit - pip install "git+https://github.com/flyteorg/flyteidl@backend-plugin-system" pip freeze - name: Test with coverage run: | diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index e59f765077..6178664f05 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -2,12 +2,11 @@ import click import grpc -from flyteidl.service.plugin_system_pb2_grpc import add_BackendPluginServiceServicer_to_server +from flyteidl.service.external_plugin_service_pb2_grpc import add_ExternalPluginServiceServicer_to_server -from flytekit.extend.backend.grpc_server import BackendPluginServer -from flytekit.loggers import cli_logger +from flytekit.extend.backend.external_plugin_service import BackendPluginServer -_serve_help = """Start a grpc server for the backend plugin system.""" +_serve_help = """Start a grpc server for the external plugin service.""" @click.command("serve", help=_serve_help) @@ -20,9 +19,9 @@ ) @click.pass_context def serve(_: click.Context, port): - cli_logger.info("Starting a grpc server for the flyteplugins service.") + click.secho(f"Starting the external plugin service...", fg="blue") server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) - add_BackendPluginServiceServicer_to_server(BackendPluginServer(), server) + add_ExternalPluginServiceServicer_to_server(BackendPluginServer(), server) server.add_insecure_port(f"[::]:{port}") server.start() diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 4489af0515..99f572ef01 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -3,7 +3,15 @@ import grpc from flyteidl.core.tasks_pb2 import TaskTemplate -from flyteidl.service import plugin_system_pb2 +from flyteidl.service.external_plugin_service_pb2 import ( + RETRYABLE_FAILURE, + RUNNING, + SUCCEEDED, + State, + TaskCreateResponse, + TaskDeleteResponse, + TaskGetResponse, +) from flytekit.models.literals import LiteralMap @@ -20,18 +28,18 @@ def task_type(self) -> str: def create( self, context: grpc.ServicerContext, - inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate, - ) -> plugin_system_pb2.TaskCreateResponse: + inputs: typing.Optional[LiteralMap] = None, + ) -> TaskCreateResponse: pass @abstractmethod - def get(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskGetResponse: + def get(self, context: grpc.ServicerContext, job_id: str) -> TaskGetResponse: pass @abstractmethod - def delete(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: + def delete(self, context: grpc.ServicerContext, job_id: str) -> TaskDeleteResponse: pass @@ -47,11 +55,11 @@ def get_plugin(task_type: str) -> BackendPluginBase: return BackendPluginRegistry._REGISTRY[task_type] -def convert_to_flyte_state(state: str) -> plugin_system_pb2.State: +def convert_to_flyte_state(state: str) -> State: if state.lower() in ["failed"]: - return plugin_system_pb2.FAILED + return RETRYABLE_FAILURE if state.lower() in ["done", "succeeded"]: - return plugin_system_pb2.SUCCEEDED + return SUCCEEDED if state.lower() in ["running"]: - return plugin_system_pb2.RUNNING + return RUNNING raise ValueError("Unrecognize state") diff --git a/flytekit/extend/backend/grpc_server.py b/flytekit/extend/backend/external_plugin_service.py similarity index 67% rename from flytekit/extend/backend/grpc_server.py rename to flytekit/extend/backend/external_plugin_service.py index fd1209695e..5b4bbb8d3b 100644 --- a/flytekit/extend/backend/grpc_server.py +++ b/flytekit/extend/backend/external_plugin_service.py @@ -1,13 +1,20 @@ import grpc -from flyteidl.service import plugin_system_pb2 -from flyteidl.service.plugin_system_pb2_grpc import BackendPluginServiceServicer +from flyteidl.service.external_plugin_service_pb2 import ( + TaskCreateRequest, + TaskCreateResponse, + TaskDeleteRequest, + TaskDeleteResponse, + TaskGetRequest, + TaskGetResponse, +) +from flyteidl.service.external_plugin_service_pb2_grpc import ExternalPluginServiceServicer from flytekit.extend.backend.base_plugin import BackendPluginRegistry from flytekit.extend.backend.model import TaskCreateRequest -class BackendPluginServer(BackendPluginServiceServicer): - def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context) -> plugin_system_pb2.TaskCreateResponse: +class BackendPluginServer(ExternalPluginServiceServicer): + def CreateTask(self, request: TaskCreateRequest, context) -> TaskCreateResponse: try: req = TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) @@ -18,7 +25,7 @@ def CreateTask(self, request: plugin_system_pb2.TaskCreateRequest, context) -> p context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to create task with error {e}") - def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context) -> plugin_system_pb2.TaskGetResponse: + def GetTask(self, request: TaskGetRequest, context) -> TaskGetResponse: try: plugin = BackendPluginRegistry.get_plugin(request.task_type) return plugin.get(context=context, job_id=request.job_id) @@ -26,7 +33,7 @@ def GetTask(self, request: plugin_system_pb2.TaskGetRequest, context) -> plugin_ context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to get task with error {e}") - def DeleteTask(self, request: plugin_system_pb2.TaskDeleteRequest, context) -> plugin_system_pb2.TaskDeleteResponse: + def DeleteTask(self, request: TaskDeleteRequest, context) -> TaskDeleteResponse: try: plugin = BackendPluginRegistry.get_plugin(request.task_type) return plugin.delete(context=context, job_id=request.job_id) diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py index 3e453bac22..b2dcc6f1bc 100644 --- a/flytekit/extend/backend/model.py +++ b/flytekit/extend/backend/model.py @@ -1,4 +1,4 @@ -from flyteidl.service import plugin_system_pb2 +from flyteidl.service import external_plugin_service_pb2 from flytekit.models import common, task from flytekit.models.literals import LiteralMap @@ -18,7 +18,7 @@ def template(self): return self._template def to_flyte_idl(self): - return plugin_system_pb2.TaskCreateRequest( + return external_plugin_service_pb2.TaskCreateRequest( inputs=self.inputs.to_flyte_idl(), template=self.template.to_flyte_idl() ) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 1b09ba9dfa..9cd6bb7a52 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -1,8 +1,12 @@ from typing import Dict, Optional import grpc -from flyteidl.service import plugin_system_pb2 -from flyteidl.service.plugin_system_pb2 import TaskGetResponse +from flyteidl.service.external_plugin_service_pb2 import ( + SUCCEEDED, + TaskCreateResponse, + TaskDeleteResponse, + TaskGetResponse, +) from google.cloud import bigquery from flytekit import FlyteContextManager, StructuredDataset @@ -21,15 +25,15 @@ class BigQueryPlugin(BackendPluginBase): def __init__(self): - super().__init__(task_type="bigquery") + super().__init__(task_type="bigquery_query_job_task") def create( self, context: grpc.ServicerContext, - inputs: Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate, - ) -> plugin_system_pb2.TaskCreateResponse: + inputs: Optional[LiteralMap] = None, + ) -> TaskCreateResponse: ctx = FlyteContextManager.current_context() python_interface_inputs = { @@ -48,15 +52,15 @@ def create( client = bigquery.Client(project=custom["ProjectID"], location=custom["Location"]) query_job = client.query(task_template.sql.statement, job_config=job_config) - return plugin_system_pb2.TaskCreateResponse(job_id=query_job.job_id) + return TaskCreateResponse(job_id=query_job.job_id) - def get(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskGetResponse: + def get(self, context: grpc.ServicerContext, job_id: str) -> TaskGetResponse: client = bigquery.Client() job = client.get_job(job_id) cur_state = convert_to_flyte_state(str(job.state)) res = None - if cur_state == plugin_system_pb2.SUCCEEDED: + if cur_state == SUCCEEDED: ctx = FlyteContextManager.current_context() output_location = f"bq://{job.destination.project}:{job.destination.dataset_id}.{job.destination.table_id}" res = literals.LiteralMap( @@ -72,10 +76,10 @@ def get(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.T return TaskGetResponse(state=cur_state, outputs=res) - def delete(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskDeleteResponse: + def delete(self, context: grpc.ServicerContext, job_id: str) -> TaskDeleteResponse: client = bigquery.Client() client.cancel_job(job_id) - return plugin_system_pb2.TaskDeleteResponse() + return TaskDeleteResponse() BackendPluginRegistry.register(BigQueryPlugin()) diff --git a/setup.py b/setup.py index 563ce6b7d5..26aa829d69 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ }, install_requires=[ "googleapis-common-protos>=1.57", - "flyteidl>=1.3.12,<1.4.0", + "flyteidl>=1.3.16,<1.4.0", "wheel>=0.30.0,<1.0.0", "pandas>=1.0.0,<2.0.0", "pyarrow>=4.0.0,<11.0.0", diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 2115ca80a4..5d832e2dff 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -1,7 +1,14 @@ import typing +from unittest.mock import MagicMock import grpc -from flyteidl.service import plugin_system_pb2 +from flyteidl.service.external_plugin_service_pb2 import ( + SUCCEEDED, + TaskCreateRequest, + TaskCreateResponse, + TaskDeleteResponse, + TaskGetResponse, +) from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry from flytekit.models.literals import LiteralMap @@ -21,14 +28,14 @@ def create( inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate, - ) -> plugin_system_pb2.TaskCreateResponse: - return plugin_system_pb2.TaskCreateResponse(job_id="dummy_id") + ) -> TaskCreateResponse: + return TaskCreateResponse(job_id="dummy_id") - def get(self, context: grpc.ServicerContext, job_id: str) -> plugin_system_pb2.TaskGetResponse: - return plugin_system_pb2.TaskGetResponse(state=plugin_system_pb2.SUCCEEDED) + def get(self, context: grpc.ServicerContext, job_id: str) -> TaskGetResponse: + return TaskGetResponse(state=SUCCEEDED) - def delete(self, context: grpc.ServicerContext, job_id) -> plugin_system_pb2.TaskDeleteResponse: - return plugin_system_pb2.TaskDeleteResponse() + def delete(self, context: grpc.ServicerContext, job_id) -> TaskDeleteResponse: + return TaskDeleteResponse() BackendPluginRegistry.register(DummyPlugin()) @@ -37,13 +44,15 @@ def delete(self, context: grpc.ServicerContext, job_id) -> plugin_system_pb2.Tas def test_base_plugin(): p = BackendPluginBase(task_type="dummy") assert p.task_type == "dummy" - p.create(None, "/tmp", None) - p.get("id") - p.delete("id") + ctx = MagicMock(spec=grpc.ServicerContext) + p.create(ctx, None, "/tmp", None) + p.get(ctx, "id") + p.delete(ctx, "id") def test_dummy_plugin(): p = BackendPluginRegistry.get_plugin("dummy") - assert p.create(None, "/tmp", None).job_id == "dummy_id" - assert p.get("id").state == plugin_system_pb2.SUCCEEDED - assert p.delete("id") == plugin_system_pb2.TaskDeleteResponse() + ctx = MagicMock(spec=grpc.ServicerContext) + assert p.create(ctx, None, "/tmp", None).job_id == "dummy_id" + assert p.get(ctx, "id").state == SUCCEEDED + assert p.delete(ctx, "id") == TaskDeleteResponse() From 26eab42baa2f9ec3cc534456b203b67279c7ad77 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 31 Mar 2023 15:43:03 -0700 Subject: [PATCH 37/50] wip Signed-off-by: Kevin Su --- flytekit/extend/backend/base_plugin.py | 7 ++--- flytekit/extend/backend/model.py | 26 +++++++++++++------ .../bigquery/backend_plugin.py | 2 +- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 99f572ef01..edafc7dbf6 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -56,10 +56,11 @@ def get_plugin(task_type: str) -> BackendPluginBase: def convert_to_flyte_state(state: str) -> State: - if state.lower() in ["failed"]: + state = state.lower() + if state in ["failed"]: return RETRYABLE_FAILURE - if state.lower() in ["done", "succeeded"]: + elif state in ["done", "succeeded"]: return SUCCEEDED - if state.lower() in ["running"]: + elif state in ["running"]: return RUNNING raise ValueError("Unrecognize state") diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py index b2dcc6f1bc..7daa513cfe 100644 --- a/flytekit/extend/backend/model.py +++ b/flytekit/extend/backend/model.py @@ -1,3 +1,5 @@ +from typing import Optional + from flyteidl.service import external_plugin_service_pb2 from flytekit.models import common, task @@ -5,26 +7,34 @@ class TaskCreateRequest(common.FlyteIdlEntity): - def __init__(self, inputs: LiteralMap, template: task.TaskTemplate): - self._inputs = inputs + def __init__(self, output_prefix: str, template: task.TaskTemplate, inputs: Optional[LiteralMap] = None): + self._output_prefix = output_prefix self._template = template + self._inputs = inputs @property - def inputs(self): - return self._inputs + def output_prefix(self) -> str: + return self._output_prefix @property - def template(self): + def template(self) -> task.TaskTemplate: return self._template - def to_flyte_idl(self): + @property + def inputs(self) -> Optional[LiteralMap]: + return self._inputs + + def to_flyte_idl(self) -> external_plugin_service_pb2.TaskCreateRequest: return external_plugin_service_pb2.TaskCreateRequest( - inputs=self.inputs.to_flyte_idl(), template=self.template.to_flyte_idl() + output_prefix=self.output_prefix, + template=self.template.to_flyte_idl(), + inputs=self.inputs.to_flyte_idl(), ) @classmethod def from_flyte_idl(cls, proto): return cls( - inputs=LiteralMap.from_flyte_idl(proto.inputs) if proto.inputs is not None else None, + output_prefix=proto.output_prefix, template=task.TaskTemplate.from_flyte_idl(proto.template), + inputs=LiteralMap.from_flyte_idl(proto.inputs) if proto.inputs is not None else None, ) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 9cd6bb7a52..bb4e64da56 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -25,7 +25,7 @@ class BigQueryPlugin(BackendPluginBase): def __init__(self): - super().__init__(task_type="bigquery_query_job_task") + super().__init__(task_type="bigquery_query_job_task1") def create( self, From 4ee141750fb523a482d574d3f930ffd1afb1d18f Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 31 Mar 2023 16:25:28 -0700 Subject: [PATCH 38/50] fixed test Signed-off-by: Kevin Su --- doc-requirements.txt | 2 +- flytekit/clis/sdk_in_container/serve.py | 18 ++++++++++++++---- .../extend/backend/external_plugin_service.py | 4 ++-- .../tests/test_backend_plugin.py | 17 +++++++++++++++++ tests/flytekit/unit/cli/pyflyte/test_build.py | 9 +++++++++ .../unit/extend/test_backend_plugin.py | 7 +++---- tests/flytekit/unit/extend/test_model.py | 2 +- 7 files changed, 47 insertions(+), 12 deletions(-) create mode 100644 plugins/flytekit-bigquery/tests/test_backend_plugin.py create mode 100644 tests/flytekit/unit/cli/pyflyte/test_build.py diff --git a/doc-requirements.txt b/doc-requirements.txt index 57286bfb07..fcd8dbea95 100644 --- a/doc-requirements.txt +++ b/doc-requirements.txt @@ -204,7 +204,7 @@ flask==2.2.3 # via mlflow flatbuffers==23.1.21 # via tensorflow -flyteidl==1.3.12 +flyteidl==1.3.16 # via flytekit fonttools==4.38.0 # via matplotlib diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 6178664f05..38c5e2862f 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -15,14 +15,24 @@ default="80", is_flag=False, type=int, - help="Grpc port for the flyteplugins service", + help="Grpc port for the external plugin service", +) +@click.option( + "--timeout", + default=None, + is_flag=False, + type=int, + help="It will wait for the specified number of seconds before shutting down grpc server. It should only be used for testing.", ) @click.pass_context -def serve(_: click.Context, port): - click.secho(f"Starting the external plugin service...", fg="blue") +def serve(_: click.Context, port, timeout): + """ + Start a grpc server for the external plugin service. + """ + click.secho(f"Starting the external plugin service.", fg="blue") server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_ExternalPluginServiceServicer_to_server(BackendPluginServer(), server) server.add_insecure_port(f"[::]:{port}") server.start() - server.wait_for_termination() + server.wait_for_termination(timeout=timeout) diff --git a/flytekit/extend/backend/external_plugin_service.py b/flytekit/extend/backend/external_plugin_service.py index 5b4bbb8d3b..f3504e0f81 100644 --- a/flytekit/extend/backend/external_plugin_service.py +++ b/flytekit/extend/backend/external_plugin_service.py @@ -9,14 +9,14 @@ ) from flyteidl.service.external_plugin_service_pb2_grpc import ExternalPluginServiceServicer +from flytekit.extend.backend import model from flytekit.extend.backend.base_plugin import BackendPluginRegistry -from flytekit.extend.backend.model import TaskCreateRequest class BackendPluginServer(ExternalPluginServiceServicer): def CreateTask(self, request: TaskCreateRequest, context) -> TaskCreateResponse: try: - req = TaskCreateRequest.from_flyte_idl(request) + req = model.TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) return plugin.create( context=context, inputs=req.inputs, output_prefix=req.output_prefix, task_template=req.template diff --git a/plugins/flytekit-bigquery/tests/test_backend_plugin.py b/plugins/flytekit-bigquery/tests/test_backend_plugin.py new file mode 100644 index 0000000000..4a53f1614f --- /dev/null +++ b/plugins/flytekit-bigquery/tests/test_backend_plugin.py @@ -0,0 +1,17 @@ +from unittest import mock +from unittest.mock import MagicMock + +import grpc +from flyteidl.service.external_plugin_service_pb2 import SUCCEEDED, TaskDeleteResponse + +from flytekit.extend.backend.base_plugin import BackendPluginRegistry + + +@mock.patch("google.cloud.bigquery.Client") +def test_bigquery_plugin(client): + client.query.return_value = job.QueryJob("dummy_id", client) + p = BackendPluginRegistry.get_plugin("bigquery_query_job_task1") + ctx = MagicMock(spec=grpc.ServicerContext) + # assert p.create(ctx, "/tmp", None).job_id == "dummy_id" + assert p.get(ctx, "id").state == SUCCEEDED + assert p.delete(ctx, "id") == TaskDeleteResponse() diff --git a/tests/flytekit/unit/cli/pyflyte/test_build.py b/tests/flytekit/unit/cli/pyflyte/test_build.py new file mode 100644 index 0000000000..a4a88f8986 --- /dev/null +++ b/tests/flytekit/unit/cli/pyflyte/test_build.py @@ -0,0 +1,9 @@ +from click.testing import CliRunner + +from flytekit.clis.sdk_in_container import pyflyte + + +def test_pyflyte_run_wf(): + runner = CliRunner() + result = runner.invoke(pyflyte.main, ["serve", "--port", "888", "--timeout", "1"], catch_exceptions=False) + assert result.exit_code == 0 diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 5d832e2dff..c594026ab0 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -4,7 +4,6 @@ import grpc from flyteidl.service.external_plugin_service_pb2 import ( SUCCEEDED, - TaskCreateRequest, TaskCreateResponse, TaskDeleteResponse, TaskGetResponse, @@ -25,9 +24,9 @@ def initialize(self): def create( self, context: grpc.ServicerContext, - inputs: typing.Optional[LiteralMap], output_prefix: str, task_template: TaskTemplate, + inputs: typing.Optional[LiteralMap] = None, ) -> TaskCreateResponse: return TaskCreateResponse(job_id="dummy_id") @@ -45,7 +44,7 @@ def test_base_plugin(): p = BackendPluginBase(task_type="dummy") assert p.task_type == "dummy" ctx = MagicMock(spec=grpc.ServicerContext) - p.create(ctx, None, "/tmp", None) + p.create(ctx, "/tmp", None) p.get(ctx, "id") p.delete(ctx, "id") @@ -53,6 +52,6 @@ def test_base_plugin(): def test_dummy_plugin(): p = BackendPluginRegistry.get_plugin("dummy") ctx = MagicMock(spec=grpc.ServicerContext) - assert p.create(ctx, None, "/tmp", None).job_id == "dummy_id" + assert p.create(ctx, "/tmp", None).job_id == "dummy_id" assert p.get(ctx, "id").state == SUCCEEDED assert p.delete(ctx, "id") == TaskDeleteResponse() diff --git a/tests/flytekit/unit/extend/test_model.py b/tests/flytekit/unit/extend/test_model.py index bf82db1469..0f154450d3 100644 --- a/tests/flytekit/unit/extend/test_model.py +++ b/tests/flytekit/unit/extend/test_model.py @@ -37,7 +37,7 @@ def test_create_request(): {}, ), ) - req = TaskCreateRequest(inputs=inputs, template=template) + req = TaskCreateRequest(output_prefix="s3://bucket/key", template=template, inputs=inputs) assert req.inputs == inputs assert req.template == template assert req == TaskCreateRequest.from_flyte_idl(req.to_flyte_idl()) From a70c12e2c4f9ed6008868bdc9b8859f6614516df Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 31 Mar 2023 16:42:07 -0700 Subject: [PATCH 39/50] fixed test Signed-off-by: Kevin Su --- flytekit/clis/sdk_in_container/serve.py | 2 +- tests/flytekit/unit/cli/pyflyte/test_build.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 38c5e2862f..d44f273125 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -29,7 +29,7 @@ def serve(_: click.Context, port, timeout): """ Start a grpc server for the external plugin service. """ - click.secho(f"Starting the external plugin service.", fg="blue") + click.secho("Starting the external plugin service...", fg="blue") server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) add_ExternalPluginServiceServicer_to_server(BackendPluginServer(), server) diff --git a/tests/flytekit/unit/cli/pyflyte/test_build.py b/tests/flytekit/unit/cli/pyflyte/test_build.py index a4a88f8986..f3ecbef547 100644 --- a/tests/flytekit/unit/cli/pyflyte/test_build.py +++ b/tests/flytekit/unit/cli/pyflyte/test_build.py @@ -3,7 +3,7 @@ from flytekit.clis.sdk_in_container import pyflyte -def test_pyflyte_run_wf(): +def test_pyflyte_serve(): runner = CliRunner() - result = runner.invoke(pyflyte.main, ["serve", "--port", "888", "--timeout", "1"], catch_exceptions=False) + result = runner.invoke(pyflyte.main, ["serve", "--port", "0", "--timeout", "1"], catch_exceptions=False) assert result.exit_code == 0 From dbd26b5aff5942b0da10438ce22ef1dc2c97b011 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Sun, 9 Apr 2023 11:00:05 -0700 Subject: [PATCH 40/50] more tests Signed-off-by: Kevin Su --- .github/workflows/pythonbuild.yml | 1 + flytekit/extend/backend/base_plugin.py | 2 +- .../bigquery/backend_plugin.py | 33 +++---- .../tests/test_backend_plugin.py | 88 +++++++++++++++++-- .../pyflyte/{test_build.py => test_serve.py} | 0 5 files changed, 100 insertions(+), 24 deletions(-) rename tests/flytekit/unit/cli/pyflyte/{test_build.py => test_serve.py} (100%) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 9c9fb1b917..66111f2733 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -42,6 +42,7 @@ jobs: pip freeze - name: Test with coverage run: | + export GRPC_VERBOSITY=debug make unit_test_codecov - name: Codecov uses: codecov/codecov-action@v3.1.0 diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index edafc7dbf6..7146113412 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -63,4 +63,4 @@ def convert_to_flyte_state(state: str) -> State: return SUCCEEDED elif state in ["running"]: return RUNNING - raise ValueError("Unrecognize state") + raise ValueError(f"Unrecognize state: {state}") diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index bb4e64da56..0df7939eda 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -25,7 +25,7 @@ class BigQueryPlugin(BackendPluginBase): def __init__(self): - super().__init__(task_type="bigquery_query_job_task1") + super().__init__(task_type="bigquery_query_job_task") def create( self, @@ -34,25 +34,26 @@ def create( task_template: TaskTemplate, inputs: Optional[LiteralMap] = None, ) -> TaskCreateResponse: - - ctx = FlyteContextManager.current_context() - python_interface_inputs = { - name: TypeEngine.guess_python_type(lt.type) for name, lt in task_template.interface.inputs.items() - } - native_inputs = TypeEngine.literal_map_to_kwargs(ctx, inputs, python_interface_inputs) - - job_config = bigquery.QueryJobConfig( - query_parameters=[ - bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val) - for name, val in native_inputs.items() - ] - ) + job_config = None + if inputs: + ctx = FlyteContextManager.current_context() + python_interface_inputs = { + name: TypeEngine.guess_python_type(lt.type) for name, lt in task_template.interface.inputs.items() + } + native_inputs = TypeEngine.literal_map_to_kwargs(ctx, inputs, python_interface_inputs) + + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val) + for name, val in native_inputs.items() + ] + ) custom = task_template.custom client = bigquery.Client(project=custom["ProjectID"], location=custom["Location"]) query_job = client.query(task_template.sql.statement, job_config=job_config) - return TaskCreateResponse(job_id=query_job.job_id) + return TaskCreateResponse(job_id=str(query_job.job_id)) def get(self, context: grpc.ServicerContext, job_id: str) -> TaskGetResponse: client = bigquery.Client() @@ -74,7 +75,7 @@ def get(self, context: grpc.ServicerContext, job_id: str) -> TaskGetResponse: } ) - return TaskGetResponse(state=cur_state, outputs=res) + return TaskGetResponse(state=cur_state, outputs=res.to_flyte_idl()) def delete(self, context: grpc.ServicerContext, job_id: str) -> TaskDeleteResponse: client = bigquery.Client() diff --git a/plugins/flytekit-bigquery/tests/test_backend_plugin.py b/plugins/flytekit-bigquery/tests/test_backend_plugin.py index 4a53f1614f..be763e61e9 100644 --- a/plugins/flytekit-bigquery/tests/test_backend_plugin.py +++ b/plugins/flytekit-bigquery/tests/test_backend_plugin.py @@ -1,17 +1,91 @@ +from datetime import timedelta from unittest import mock from unittest.mock import MagicMock import grpc -from flyteidl.service.external_plugin_service_pb2 import SUCCEEDED, TaskDeleteResponse +from flyteidl.service.external_plugin_service_pb2 import SUCCEEDED +import flytekit.models.interface as interface_models from flytekit.extend.backend.base_plugin import BackendPluginRegistry +from flytekit.models import literals, task, types +from flytekit.models.core.identifier import Identifier, ResourceType +from flytekit.models.task import Sql, TaskTemplate +@mock.patch("google.cloud.bigquery.job.QueryJob") @mock.patch("google.cloud.bigquery.Client") -def test_bigquery_plugin(client): - client.query.return_value = job.QueryJob("dummy_id", client) - p = BackendPluginRegistry.get_plugin("bigquery_query_job_task1") +def test_bigquery_plugin(mock_client, mock_query_job): + job_id = "dummy_id" + mock_instance = mock_client.return_value + mock_query_job_instance = mock_query_job.return_value + mock_query_job_instance.state.return_value = "SUCCEEDED" + mock_query_job_instance.job_id.return_value = job_id + + class MockDestination: + def __init__(self): + self.project = "dummy_project" + self.dataset_id = "dummy_dataset" + self.table_id = "dummy_table" + + class MockJob: + def __init__(self): + self.state = "SUCCEEDED" + self.job_id = job_id + self.destination = MockDestination() + + mock_instance.get_job.return_value = MockJob() + mock_instance.query.return_value = MockJob() + mock_instance.cancel_job.return_value = MockJob() + + task_id = Identifier( + resource_type=ResourceType.TASK, project="project", domain="domain", name="name", version="version" + ) + task_metadata = task.TaskMetadata( + True, + task.RuntimeMetadata(task.RuntimeMetadata.RuntimeType.FLYTE_SDK, "1.0.0", "python"), + timedelta(days=1), + literals.RetryStrategy(3), + True, + "0.1.1b0", + "This is deprecated!", + True, + "A", + ) + task_config = { + "Location": "us-central1", + "ProjectID": "dummy_project", + } + + int_type = types.LiteralType(types.SimpleType.INTEGER) + interfaces = interface_models.TypedInterface( + { + "a": interface_models.Variable(int_type, "description1"), + "b": interface_models.Variable(int_type, "description2"), + }, + {}, + ) + inputs = literals.LiteralMap( + { + "a": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), + "b": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), + }, + ) + + p = BackendPluginRegistry.get_plugin("bigquery_query_job_task") ctx = MagicMock(spec=grpc.ServicerContext) - # assert p.create(ctx, "/tmp", None).job_id == "dummy_id" - assert p.get(ctx, "id").state == SUCCEEDED - assert p.delete(ctx, "id") == TaskDeleteResponse() + template = TaskTemplate( + id=task_id, + custom=task_config, + metadata=task_metadata, + interface=interfaces, + type="bigquery_query_job_task", + sql=Sql("SELECT 1"), + ) + assert p.create(ctx, "/tmp", template, inputs).job_id == job_id + res = p.get(ctx, job_id) + assert res.state == SUCCEEDED + assert ( + res.outputs.literals["results"].scalar.structured_dataset.uri == "bq://dummy_project:dummy_dataset.dummy_table" + ) + p.delete(ctx, job_id) + mock_instance.cancel_job.assert_called() diff --git a/tests/flytekit/unit/cli/pyflyte/test_build.py b/tests/flytekit/unit/cli/pyflyte/test_serve.py similarity index 100% rename from tests/flytekit/unit/cli/pyflyte/test_build.py rename to tests/flytekit/unit/cli/pyflyte/test_serve.py From 2c1cce878c8876b21f39b83b9f3dd92808a09e86 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Sun, 9 Apr 2023 11:31:05 -0700 Subject: [PATCH 41/50] more tests Signed-off-by: Kevin Su --- .github/workflows/pythonbuild.yml | 1 - .../extend/backend/external_plugin_service.py | 6 +- .../tests/test_backend_plugin.py | 52 +----------- .../unit/extend/test_backend_plugin.py | 85 +++++++++++++++++-- 4 files changed, 84 insertions(+), 60 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 66111f2733..9c9fb1b917 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -42,7 +42,6 @@ jobs: pip freeze - name: Test with coverage run: | - export GRPC_VERBOSITY=debug make unit_test_codecov - name: Codecov uses: codecov/codecov-action@v3.1.0 diff --git a/flytekit/extend/backend/external_plugin_service.py b/flytekit/extend/backend/external_plugin_service.py index f3504e0f81..2e53232d09 100644 --- a/flytekit/extend/backend/external_plugin_service.py +++ b/flytekit/extend/backend/external_plugin_service.py @@ -14,7 +14,7 @@ class BackendPluginServer(ExternalPluginServiceServicer): - def CreateTask(self, request: TaskCreateRequest, context) -> TaskCreateResponse: + def CreateTask(self, context: grpc.ServicerContext, request: TaskCreateRequest) -> TaskCreateResponse: try: req = model.TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) @@ -25,7 +25,7 @@ def CreateTask(self, request: TaskCreateRequest, context) -> TaskCreateResponse: context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to create task with error {e}") - def GetTask(self, request: TaskGetRequest, context) -> TaskGetResponse: + def GetTask(self, context: grpc.ServicerContext, request: TaskGetRequest) -> TaskGetResponse: try: plugin = BackendPluginRegistry.get_plugin(request.task_type) return plugin.get(context=context, job_id=request.job_id) @@ -33,7 +33,7 @@ def GetTask(self, request: TaskGetRequest, context) -> TaskGetResponse: context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to get task with error {e}") - def DeleteTask(self, request: TaskDeleteRequest, context) -> TaskDeleteResponse: + def DeleteTask(self, context: grpc.ServicerContext, request: TaskDeleteRequest) -> TaskDeleteResponse: try: plugin = BackendPluginRegistry.get_plugin(request.task_type) return plugin.delete(context=context, job_id=request.job_id) diff --git a/plugins/flytekit-bigquery/tests/test_backend_plugin.py b/plugins/flytekit-bigquery/tests/test_backend_plugin.py index be763e61e9..8af6582b1a 100644 --- a/plugins/flytekit-bigquery/tests/test_backend_plugin.py +++ b/plugins/flytekit-bigquery/tests/test_backend_plugin.py @@ -1,15 +1,11 @@ -from datetime import timedelta from unittest import mock from unittest.mock import MagicMock import grpc from flyteidl.service.external_plugin_service_pb2 import SUCCEEDED -import flytekit.models.interface as interface_models from flytekit.extend.backend.base_plugin import BackendPluginRegistry -from flytekit.models import literals, task, types -from flytekit.models.core.identifier import Identifier, ResourceType -from flytekit.models.task import Sql, TaskTemplate +from tests.flytekit.unit.extend.test_backend_plugin import dummy_template, task_inputs @mock.patch("google.cloud.bigquery.job.QueryJob") @@ -37,51 +33,11 @@ def __init__(self): mock_instance.query.return_value = MockJob() mock_instance.cancel_job.return_value = MockJob() - task_id = Identifier( - resource_type=ResourceType.TASK, project="project", domain="domain", name="name", version="version" - ) - task_metadata = task.TaskMetadata( - True, - task.RuntimeMetadata(task.RuntimeMetadata.RuntimeType.FLYTE_SDK, "1.0.0", "python"), - timedelta(days=1), - literals.RetryStrategy(3), - True, - "0.1.1b0", - "This is deprecated!", - True, - "A", - ) - task_config = { - "Location": "us-central1", - "ProjectID": "dummy_project", - } - - int_type = types.LiteralType(types.SimpleType.INTEGER) - interfaces = interface_models.TypedInterface( - { - "a": interface_models.Variable(int_type, "description1"), - "b": interface_models.Variable(int_type, "description2"), - }, - {}, - ) - inputs = literals.LiteralMap( - { - "a": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), - "b": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), - }, - ) - p = BackendPluginRegistry.get_plugin("bigquery_query_job_task") ctx = MagicMock(spec=grpc.ServicerContext) - template = TaskTemplate( - id=task_id, - custom=task_config, - metadata=task_metadata, - interface=interfaces, - type="bigquery_query_job_task", - sql=Sql("SELECT 1"), - ) - assert p.create(ctx, "/tmp", template, inputs).job_id == job_id + dummy_template.type = "bigquery_query_job_task" + + assert p.create(ctx, "/tmp", dummy_template, task_inputs).job_id == job_id res = p.get(ctx, job_id) assert res.state == SUCCEEDED assert ( diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index c594026ab0..1db725e840 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -1,17 +1,27 @@ import typing +from datetime import timedelta from unittest.mock import MagicMock import grpc from flyteidl.service.external_plugin_service_pb2 import ( SUCCEEDED, + TaskCreateRequest, TaskCreateResponse, + TaskDeleteRequest, TaskDeleteResponse, + TaskGetRequest, TaskGetResponse, ) +import flytekit.models.interface as interface_models from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry +from flytekit.extend.backend.external_plugin_service import BackendPluginServer +from flytekit.models import literals, task, types +from flytekit.models.core.identifier import Identifier, ResourceType from flytekit.models.literals import LiteralMap -from flytekit.models.task import TaskTemplate +from flytekit.models.task import Sql, TaskTemplate + +dummy_id = "dummy_id" class DummyPlugin(BackendPluginBase): @@ -28,7 +38,7 @@ def create( task_template: TaskTemplate, inputs: typing.Optional[LiteralMap] = None, ) -> TaskCreateResponse: - return TaskCreateResponse(job_id="dummy_id") + return TaskCreateResponse(job_id=dummy_id) def get(self, context: grpc.ServicerContext, job_id: str) -> TaskGetResponse: return TaskGetResponse(state=SUCCEEDED) @@ -40,18 +50,77 @@ def delete(self, context: grpc.ServicerContext, job_id) -> TaskDeleteResponse: BackendPluginRegistry.register(DummyPlugin()) +task_id = Identifier( + resource_type=ResourceType.TASK, project="project", domain="domain", name="name", version="version" +) +task_metadata = task.TaskMetadata( + True, + task.RuntimeMetadata(task.RuntimeMetadata.RuntimeType.FLYTE_SDK, "1.0.0", "python"), + timedelta(days=1), + literals.RetryStrategy(3), + True, + "0.1.1b0", + "This is deprecated!", + True, + "A", +) +task_config = { + "Location": "us-central1", + "ProjectID": "dummy_project", +} + +int_type = types.LiteralType(types.SimpleType.INTEGER) +interfaces = interface_models.TypedInterface( + { + "a": interface_models.Variable(int_type, "description1"), + "b": interface_models.Variable(int_type, "description2"), + }, + {}, +) +task_inputs = literals.LiteralMap( + { + "a": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), + "b": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), + }, +) + +dummy_template = TaskTemplate( + id=task_id, + custom=task_config, + metadata=task_metadata, + interface=interfaces, + type="dummy", + sql=Sql("SELECT 1"), +) + + def test_base_plugin(): p = BackendPluginBase(task_type="dummy") assert p.task_type == "dummy" ctx = MagicMock(spec=grpc.ServicerContext) - p.create(ctx, "/tmp", None) - p.get(ctx, "id") - p.delete(ctx, "id") + p.create(ctx, "/tmp", dummy_template, task_inputs) + p.get(ctx, dummy_id) + p.delete(ctx, dummy_id) def test_dummy_plugin(): p = BackendPluginRegistry.get_plugin("dummy") ctx = MagicMock(spec=grpc.ServicerContext) - assert p.create(ctx, "/tmp", None).job_id == "dummy_id" - assert p.get(ctx, "id").state == SUCCEEDED - assert p.delete(ctx, "id") == TaskDeleteResponse() + assert p.create(ctx, "/tmp", dummy_template, task_inputs).job_id == dummy_id + assert p.get(ctx, dummy_id).state == SUCCEEDED + assert p.delete(ctx, dummy_id) == TaskDeleteResponse() + + +def test_backend_plugin_server(): + server = BackendPluginServer() + ctx = MagicMock(spec=grpc.ServicerContext) + request = TaskCreateRequest( + inputs=task_inputs.to_flyte_idl(), output_prefix="/tmp", template=dummy_template.to_flyte_idl() + ) + + assert server.CreateTask(ctx, request).job_id == dummy_id + assert server.GetTask(ctx, TaskGetRequest(task_type="dummy", job_id=dummy_id)).state == SUCCEEDED + assert server.DeleteTask(ctx, TaskDeleteRequest(task_type="dummy", job_id=dummy_id)) == TaskDeleteResponse() + + res = server.GetTask(ctx, TaskGetRequest(task_type="fake", job_id=dummy_id)) + assert res is None From 5a2bdc4242515e7044754db5c32a924d2f9285c8 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Sun, 9 Apr 2023 12:00:20 -0700 Subject: [PATCH 42/50] more tests Signed-off-by: Kevin Su --- flytekit/models/task.py | 2 +- .../tests/test_backend_plugin.py | 51 ++++++++++++++++++- .../unit/extend/test_backend_plugin.py | 13 +---- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/flytekit/models/task.py b/flytekit/models/task.py index f7f1d710c9..eabfc6cb01 100644 --- a/flytekit/models/task.py +++ b/flytekit/models/task.py @@ -329,7 +329,7 @@ def __init__( type, metadata, interface, - custom, + custom=None, container=None, task_type_version=0, security_context=None, diff --git a/plugins/flytekit-bigquery/tests/test_backend_plugin.py b/plugins/flytekit-bigquery/tests/test_backend_plugin.py index 8af6582b1a..63e5077045 100644 --- a/plugins/flytekit-bigquery/tests/test_backend_plugin.py +++ b/plugins/flytekit-bigquery/tests/test_backend_plugin.py @@ -1,11 +1,16 @@ +from datetime import timedelta from unittest import mock from unittest.mock import MagicMock import grpc from flyteidl.service.external_plugin_service_pb2 import SUCCEEDED +import flytekit.models.interface as interface_models from flytekit.extend.backend.base_plugin import BackendPluginRegistry -from tests.flytekit.unit.extend.test_backend_plugin import dummy_template, task_inputs +from flytekit.interfaces.cli_identifiers import Identifier +from flytekit.models import literals, task, types +from flytekit.models.core.identifier import ResourceType +from flytekit.models.task import Sql, TaskTemplate @mock.patch("google.cloud.bigquery.job.QueryJob") @@ -35,7 +40,49 @@ def __init__(self): p = BackendPluginRegistry.get_plugin("bigquery_query_job_task") ctx = MagicMock(spec=grpc.ServicerContext) - dummy_template.type = "bigquery_query_job_task" + + task_id = Identifier( + resource_type=ResourceType.TASK, project="project", domain="domain", name="name", version="version" + ) + task_metadata = task.TaskMetadata( + True, + task.RuntimeMetadata(task.RuntimeMetadata.RuntimeType.FLYTE_SDK, "1.0.0", "python"), + timedelta(days=1), + literals.RetryStrategy(3), + True, + "0.1.1b0", + "This is deprecated!", + True, + "A", + ) + task_config = { + "Location": "us-central1", + "ProjectID": "dummy_project", + } + + int_type = types.LiteralType(types.SimpleType.INTEGER) + interfaces = interface_models.TypedInterface( + { + "a": interface_models.Variable(int_type, "description1"), + "b": interface_models.Variable(int_type, "description2"), + }, + {}, + ) + task_inputs = literals.LiteralMap( + { + "a": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), + "b": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), + }, + ) + + dummy_template = TaskTemplate( + id=task_id, + custom=task_config, + metadata=task_metadata, + interface=interfaces, + type="bigquery_query_job_task", + sql=Sql("SELECT 1"), + ) assert p.create(ctx, "/tmp", dummy_template, task_inputs).job_id == job_id res = p.get(ctx, job_id) diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 1db725e840..78e6712694 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -49,10 +49,7 @@ def delete(self, context: grpc.ServicerContext, job_id) -> TaskDeleteResponse: BackendPluginRegistry.register(DummyPlugin()) - -task_id = Identifier( - resource_type=ResourceType.TASK, project="project", domain="domain", name="name", version="version" -) +task_id = Identifier(resource_type=ResourceType.TASK, project="project", domain="domain", name="t1", version="version") task_metadata = task.TaskMetadata( True, task.RuntimeMetadata(task.RuntimeMetadata.RuntimeType.FLYTE_SDK, "1.0.0", "python"), @@ -64,33 +61,25 @@ def delete(self, context: grpc.ServicerContext, job_id) -> TaskDeleteResponse: True, "A", ) -task_config = { - "Location": "us-central1", - "ProjectID": "dummy_project", -} int_type = types.LiteralType(types.SimpleType.INTEGER) interfaces = interface_models.TypedInterface( { "a": interface_models.Variable(int_type, "description1"), - "b": interface_models.Variable(int_type, "description2"), }, {}, ) task_inputs = literals.LiteralMap( { "a": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), - "b": literals.Literal(scalar=literals.Scalar(primitive=literals.Primitive(integer=1))), }, ) dummy_template = TaskTemplate( id=task_id, - custom=task_config, metadata=task_metadata, interface=interfaces, type="dummy", - sql=Sql("SELECT 1"), ) From ae8c37ede2aa4139af6b59a4ca8864be2702c253 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Sun, 9 Apr 2023 12:36:15 -0700 Subject: [PATCH 43/50] lint Signed-off-by: Kevin Su --- tests/flytekit/unit/extend/test_backend_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 78e6712694..4646257aa1 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -19,7 +19,7 @@ from flytekit.models import literals, task, types from flytekit.models.core.identifier import Identifier, ResourceType from flytekit.models.literals import LiteralMap -from flytekit.models.task import Sql, TaskTemplate +from flytekit.models.task import TaskTemplate dummy_id = "dummy_id" From 0b151cfb5317d3e04fe88d89d14313a16d8a587a Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 12 Apr 2023 12:37:11 -0700 Subject: [PATCH 44/50] nit Signed-off-by: Kevin Su --- flytekit/extend/backend/external_plugin_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flytekit/extend/backend/external_plugin_service.py b/flytekit/extend/backend/external_plugin_service.py index 2e53232d09..8996c53ef3 100644 --- a/flytekit/extend/backend/external_plugin_service.py +++ b/flytekit/extend/backend/external_plugin_service.py @@ -14,7 +14,7 @@ class BackendPluginServer(ExternalPluginServiceServicer): - def CreateTask(self, context: grpc.ServicerContext, request: TaskCreateRequest) -> TaskCreateResponse: + def CreateTask(self, request: TaskCreateRequest, context: grpc.ServicerContext) -> TaskCreateResponse: try: req = model.TaskCreateRequest.from_flyte_idl(request) plugin = BackendPluginRegistry.get_plugin(req.template.type) @@ -25,7 +25,7 @@ def CreateTask(self, context: grpc.ServicerContext, request: TaskCreateRequest) context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to create task with error {e}") - def GetTask(self, context: grpc.ServicerContext, request: TaskGetRequest) -> TaskGetResponse: + def GetTask(self, request: TaskGetRequest, context: grpc.ServicerContext) -> TaskGetResponse: try: plugin = BackendPluginRegistry.get_plugin(request.task_type) return plugin.get(context=context, job_id=request.job_id) @@ -33,7 +33,7 @@ def GetTask(self, context: grpc.ServicerContext, request: TaskGetRequest) -> Tas context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to get task with error {e}") - def DeleteTask(self, context: grpc.ServicerContext, request: TaskDeleteRequest) -> TaskDeleteResponse: + def DeleteTask(self, request: TaskDeleteRequest, context: grpc.ServicerContext) -> TaskDeleteResponse: try: plugin = BackendPluginRegistry.get_plugin(request.task_type) return plugin.delete(context=context, job_id=request.job_id) From 9f8337d578f11226d300143298ff1a0ede3ccf53 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 13 Apr 2023 10:28:40 -0700 Subject: [PATCH 45/50] lint Signed-off-by: Kevin Su --- tests/flytekit/unit/extend/test_backend_plugin.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 4646257aa1..44fdfd6b78 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -107,9 +107,9 @@ def test_backend_plugin_server(): inputs=task_inputs.to_flyte_idl(), output_prefix="/tmp", template=dummy_template.to_flyte_idl() ) - assert server.CreateTask(ctx, request).job_id == dummy_id - assert server.GetTask(ctx, TaskGetRequest(task_type="dummy", job_id=dummy_id)).state == SUCCEEDED - assert server.DeleteTask(ctx, TaskDeleteRequest(task_type="dummy", job_id=dummy_id)) == TaskDeleteResponse() + assert server.CreateTask(request, ctx).job_id == dummy_id + assert server.GetTask(TaskGetRequest(task_type="dummy", job_id=dummy_id), ctx).state == SUCCEEDED + assert server.DeleteTask(TaskDeleteRequest(task_type="dummy", job_id=dummy_id), ctx) == TaskDeleteResponse() - res = server.GetTask(ctx, TaskGetRequest(task_type="fake", job_id=dummy_id)) + res = server.GetTask(TaskGetRequest(task_type="fake", job_id=dummy_id), ctx) assert res is None From 4b92275eec1d2ead1de9ec6222e93749c80bbf98 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 13 Apr 2023 13:02:21 -0700 Subject: [PATCH 46/50] nit Signed-off-by: Kevin Su --- flytekit/clis/sdk_in_container/pyflyte.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flytekit/clis/sdk_in_container/pyflyte.py b/flytekit/clis/sdk_in_container/pyflyte.py index 7c3ced7bb3..3fde915318 100644 --- a/flytekit/clis/sdk_in_container/pyflyte.py +++ b/flytekit/clis/sdk_in_container/pyflyte.py @@ -134,6 +134,7 @@ def main(ctx, pkgs: typing.List[str], config: str, verbose: bool): main.add_command(register) main.add_command(backfill) main.add_command(serve) +main.epilog if __name__ == "__main__": main() From e07c72dea3ed8f62d4b28051df11c97dae18153a Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Thu, 4 May 2023 16:21:14 -0700 Subject: [PATCH 47/50] update Signed-off-by: Kevin Su --- .github/workflows/pythonpublish.yml | 41 +++++++++++++++++++ Dockerfile.external-plugin-service | 10 +++++ flytekit/clis/sdk_in_container/serve.py | 14 +++++-- flytekit/extend/backend/base_plugin.py | 6 +-- flytekit/models/task.py | 2 +- .../bigquery/backend_plugin.py | 9 +++- .../unit/extend/test_backend_plugin.py | 10 +---- 7 files changed, 75 insertions(+), 17 deletions(-) create mode 100644 Dockerfile.external-plugin-service diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml index 6a94affc68..6f0060970c 100644 --- a/.github/workflows/pythonpublish.yml +++ b/.github/workflows/pythonpublish.yml @@ -141,3 +141,44 @@ jobs: file: ./plugins/flytekit-sqlalchemy/Dockerfile cache-from: type=gha cache-to: type=gha,mode=max + build-and-push-external-plugin-service-images: + runs-on: ubuntu-latest + needs: deploy + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: "0" + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v1 + - name: Login to GitHub Container Registry + if: ${{ github.event_name == 'release' }} + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: "${{ secrets.FLYTE_BOT_USERNAME }}" + password: "${{ secrets.FLYTE_BOT_PAT }}" + - name: Prepare External Plugin Service Image Names + id: external-plugin-service-names + uses: docker/metadata-action@v3 + with: + images: | + ghcr.io/${{ github.repository_owner }}/flytekit + tags: | + external-plugin-service-latest + external-plugin-service-${{ github.sha }} + external-plugin-service-${{ needs.deploy.outputs.version }} + - name: Push External Plugin Service Image to GitHub Registry + uses: docker/build-push-action@v2 + with: + context: "." + platforms: linux/arm64, linux/amd64 + push: ${{ github.event_name == 'release' }} + tags: ${{ steps.external-plugin-service-names.outputs.tags }} + build-args: | + VERSION=${{ needs.deploy.outputs.version }} + file: ./Dockerfile + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/Dockerfile.external-plugin-service b/Dockerfile.external-plugin-service new file mode 100644 index 0000000000..435e15107e --- /dev/null +++ b/Dockerfile.external-plugin-service @@ -0,0 +1,10 @@ +FROM python:3.9-slim-buster + +MAINTAINER Flyte Team +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytekit + +ARG VERSION +RUN pip install -U flytekit==$VERSION \ + flytekitplugins-bigquery==$VERSION \ + +CMD pyflyte serve --port 80 diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index d44f273125..64543406eb 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -17,20 +17,28 @@ type=int, help="Grpc port for the external plugin service", ) +@click.option( + "--worker", + default="10", + is_flag=False, + type=int, + help="Number of workers for the grpc server", +) @click.option( "--timeout", default=None, is_flag=False, type=int, - help="It will wait for the specified number of seconds before shutting down grpc server. It should only be used for testing.", + help="It will wait for the specified number of seconds before shutting down grpc server. It should only be used " + "for testing.", ) @click.pass_context -def serve(_: click.Context, port, timeout): +def serve(_: click.Context, port, worker, timeout): """ Start a grpc server for the external plugin service. """ click.secho("Starting the external plugin service...", fg="blue") - server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) + server = grpc.server(futures.ThreadPoolExecutor(max_workers=worker)) add_ExternalPluginServiceServicer_to_server(BackendPluginServer(), server) server.add_insecure_port(f"[::]:{port}") diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index 7146113412..f6ec295823 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -1,5 +1,5 @@ import typing -from abc import abstractmethod +from abc import ABC, abstractmethod import grpc from flyteidl.core.tasks_pb2 import TaskTemplate @@ -16,7 +16,7 @@ from flytekit.models.literals import LiteralMap -class BackendPluginBase: +class BackendPluginBase(ABC): def __init__(self, task_type: str): self._task_type = task_type @@ -63,4 +63,4 @@ def convert_to_flyte_state(state: str) -> State: return SUCCEEDED elif state in ["running"]: return RUNNING - raise ValueError(f"Unrecognize state: {state}") + raise ValueError(f"Unrecognized state: {state}") diff --git a/flytekit/models/task.py b/flytekit/models/task.py index eabfc6cb01..f7f1d710c9 100644 --- a/flytekit/models/task.py +++ b/flytekit/models/task.py @@ -329,7 +329,7 @@ def __init__( type, metadata, interface, - custom=None, + custom, container=None, task_type_version=0, security_context=None, diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 0df7939eda..95aab9b150 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -1,3 +1,4 @@ +import datetime from typing import Dict, Optional import grpc @@ -18,8 +19,14 @@ from flytekit.models.types import LiteralType, StructuredDatasetType pythonTypeToBigQueryType: Dict[type, str] = { - str: "STRING", + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data_type_sizes + list: "ARRAY", + bool: "BOOL", + bytes: "BYTES", + datetime.datetime: "DATETIME", + float: "FLOAT64", int: "INT64", + str: "STRING", } diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 44fdfd6b78..3881a6de49 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -80,18 +80,10 @@ def delete(self, context: grpc.ServicerContext, job_id) -> TaskDeleteResponse: metadata=task_metadata, interface=interfaces, type="dummy", + custom={}, ) -def test_base_plugin(): - p = BackendPluginBase(task_type="dummy") - assert p.task_type == "dummy" - ctx = MagicMock(spec=grpc.ServicerContext) - p.create(ctx, "/tmp", dummy_template, task_inputs) - p.get(ctx, dummy_id) - p.delete(ctx, dummy_id) - - def test_dummy_plugin(): p = BackendPluginRegistry.get_plugin("dummy") ctx = MagicMock(spec=grpc.ServicerContext) From 03578065d856696717465b20f489f4c1c5401d71 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 5 May 2023 00:46:08 -0700 Subject: [PATCH 48/50] update Signed-off-by: Kevin Su --- .github/workflows/pythonpublish.yml | 8 ++-- flytekit/extend/backend/base_plugin.py | 43 ++++++++++++++++++- .../extend/backend/external_plugin_service.py | 27 ++++++++---- flytekit/extend/backend/model.py | 40 ----------------- .../bigquery/backend_plugin.py | 3 +- .../tests/test_backend_plugin.py | 2 +- .../unit/extend/test_backend_plugin.py | 8 ++-- tests/flytekit/unit/extend/test_model.py | 43 ------------------- 8 files changed, 71 insertions(+), 103 deletions(-) delete mode 100644 flytekit/extend/backend/model.py delete mode 100644 tests/flytekit/unit/extend/test_model.py diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml index 00bba1eaf4..63e888e6a1 100644 --- a/.github/workflows/pythonpublish.yml +++ b/.github/workflows/pythonpublish.yml @@ -166,11 +166,11 @@ jobs: uses: docker/metadata-action@v3 with: images: | - ghcr.io/${{ github.repository_owner }}/flytekit + ghcr.io/${{ github.repository_owner }}/external-plugin-service tags: | - external-plugin-service-latest - external-plugin-service-${{ github.sha }} - external-plugin-service-${{ needs.deploy.outputs.version }} + latest + ${{ github.sha }} + ${{ needs.deploy.outputs.version }} - name: Push External Plugin Service Image to GitHub Registry uses: docker/build-push-action@v2 with: diff --git a/flytekit/extend/backend/base_plugin.py b/flytekit/extend/backend/base_plugin.py index f6ec295823..9fc1bc206b 100644 --- a/flytekit/extend/backend/base_plugin.py +++ b/flytekit/extend/backend/base_plugin.py @@ -13,15 +13,29 @@ TaskGetResponse, ) +from flytekit import logger from flytekit.models.literals import LiteralMap class BackendPluginBase(ABC): + """ + This is the base class for all backend plugins. It defines the interface that all plugins must implement. + The external plugins service will be run either locally or in a pod, and will be responsible for + invoking backend plugins. The propeller will communicate with the external plugins service + to create tasks, get the status of tasks, and delete tasks. + + All the backend plugins should be registered in the BackendPluginRegistry. External plugins service + will look up the plugin based on the task type. Every task type can only have one plugin. + """ + def __init__(self, task_type: str): self._task_type = task_type @property def task_type(self) -> str: + """ + task_type is the name of the task type that this plugin supports. + """ return self._task_type @abstractmethod @@ -32,30 +46,57 @@ def create( task_template: TaskTemplate, inputs: typing.Optional[LiteralMap] = None, ) -> TaskCreateResponse: + """ + Return a Unique ID for the task that was created. It should return error code if the task creation failed. + """ pass @abstractmethod def get(self, context: grpc.ServicerContext, job_id: str) -> TaskGetResponse: + """ + Return the status of the task, and return the outputs in some cases. For example, bigquery job + can't write the structured dataset to the output location, so it returns the output literals to the propeller, + and the propeller will write the structured dataset to the blob store. + """ pass @abstractmethod def delete(self, context: grpc.ServicerContext, job_id: str) -> TaskDeleteResponse: + """ + Delete the task. This call should be idempotent. + """ pass class BackendPluginRegistry(object): + """ + This is the registry for all backend plugins. The external plugins service will look up the plugin + based on the task type. + """ + _REGISTRY: typing.Dict[str, BackendPluginBase] = {} @staticmethod def register(plugin: BackendPluginBase): + if plugin.task_type in BackendPluginRegistry._REGISTRY: + raise ValueError(f"Duplicate plugin for task type {plugin.task_type}") BackendPluginRegistry._REGISTRY[plugin.task_type] = plugin + logger.info(f"Registering backend plugin for task type {plugin.task_type}") @staticmethod - def get_plugin(task_type: str) -> BackendPluginBase: + def get_plugin(context: grpc.ServicerContext, task_type: str) -> typing.Optional[BackendPluginBase]: + if task_type not in BackendPluginRegistry._REGISTRY: + logger.error(f"Cannot find backend plugin for task type [{task_type}]") + context.set_code(grpc.StatusCode.NOT_FOUND) + context.set_details(f"Cannot find backend plugin for task type [{task_type}]") + return None return BackendPluginRegistry._REGISTRY[task_type] def convert_to_flyte_state(state: str) -> State: + """ + Convert the state from the backend plugin to the state in flyte. + """ state = state.lower() if state in ["failed"]: return RETRYABLE_FAILURE diff --git a/flytekit/extend/backend/external_plugin_service.py b/flytekit/extend/backend/external_plugin_service.py index 8996c53ef3..e820a320b1 100644 --- a/flytekit/extend/backend/external_plugin_service.py +++ b/flytekit/extend/backend/external_plugin_service.py @@ -1,5 +1,6 @@ import grpc from flyteidl.service.external_plugin_service_pb2 import ( + PERMANENT_FAILURE, TaskCreateRequest, TaskCreateResponse, TaskDeleteRequest, @@ -9,34 +10,44 @@ ) from flyteidl.service.external_plugin_service_pb2_grpc import ExternalPluginServiceServicer -from flytekit.extend.backend import model +from flytekit import logger from flytekit.extend.backend.base_plugin import BackendPluginRegistry +from flytekit.models.literals import LiteralMap +from flytekit.models.task import TaskTemplate class BackendPluginServer(ExternalPluginServiceServicer): def CreateTask(self, request: TaskCreateRequest, context: grpc.ServicerContext) -> TaskCreateResponse: try: - req = model.TaskCreateRequest.from_flyte_idl(request) - plugin = BackendPluginRegistry.get_plugin(req.template.type) - return plugin.create( - context=context, inputs=req.inputs, output_prefix=req.output_prefix, task_template=req.template - ) + tmp = TaskTemplate.from_flyte_idl(request.template) + inputs = LiteralMap.from_flyte_idl(request.inputs) if request.inputs else None + plugin = BackendPluginRegistry.get_plugin(context, tmp.type) + if plugin is None: + return TaskCreateResponse() + return plugin.create(context=context, inputs=inputs, output_prefix=request.output_prefix, task_template=tmp) except Exception as e: + logger.error(f"failed to create task with error {e}") context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to create task with error {e}") def GetTask(self, request: TaskGetRequest, context: grpc.ServicerContext) -> TaskGetResponse: try: - plugin = BackendPluginRegistry.get_plugin(request.task_type) + plugin = BackendPluginRegistry.get_plugin(context, request.task_type) + if plugin is None: + return TaskGetResponse(state=PERMANENT_FAILURE) return plugin.get(context=context, job_id=request.job_id) except Exception as e: + logger.error(f"failed to get task with error {e}") context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to get task with error {e}") def DeleteTask(self, request: TaskDeleteRequest, context: grpc.ServicerContext) -> TaskDeleteResponse: try: - plugin = BackendPluginRegistry.get_plugin(request.task_type) + plugin = BackendPluginRegistry.get_plugin(context, request.task_type) + if plugin is None: + return TaskDeleteResponse() return plugin.delete(context=context, job_id=request.job_id) except Exception as e: + logger.error(f"failed to delete task with error {e}") context.set_code(grpc.StatusCode.INTERNAL) context.set_details(f"failed to delete task with error {e}") diff --git a/flytekit/extend/backend/model.py b/flytekit/extend/backend/model.py deleted file mode 100644 index 7daa513cfe..0000000000 --- a/flytekit/extend/backend/model.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Optional - -from flyteidl.service import external_plugin_service_pb2 - -from flytekit.models import common, task -from flytekit.models.literals import LiteralMap - - -class TaskCreateRequest(common.FlyteIdlEntity): - def __init__(self, output_prefix: str, template: task.TaskTemplate, inputs: Optional[LiteralMap] = None): - self._output_prefix = output_prefix - self._template = template - self._inputs = inputs - - @property - def output_prefix(self) -> str: - return self._output_prefix - - @property - def template(self) -> task.TaskTemplate: - return self._template - - @property - def inputs(self) -> Optional[LiteralMap]: - return self._inputs - - def to_flyte_idl(self) -> external_plugin_service_pb2.TaskCreateRequest: - return external_plugin_service_pb2.TaskCreateRequest( - output_prefix=self.output_prefix, - template=self.template.to_flyte_idl(), - inputs=self.inputs.to_flyte_idl(), - ) - - @classmethod - def from_flyte_idl(cls, proto): - return cls( - output_prefix=proto.output_prefix, - template=task.TaskTemplate.from_flyte_idl(proto.template), - inputs=LiteralMap.from_flyte_idl(proto.inputs) if proto.inputs is not None else None, - ) diff --git a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py index 95aab9b150..acd5ece430 100644 --- a/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py +++ b/plugins/flytekit-bigquery/flytekitplugins/bigquery/backend_plugin.py @@ -10,7 +10,7 @@ ) from google.cloud import bigquery -from flytekit import FlyteContextManager, StructuredDataset +from flytekit import FlyteContextManager, StructuredDataset, logger from flytekit.core.type_engine import TypeEngine from flytekit.extend.backend.base_plugin import BackendPluginBase, BackendPluginRegistry, convert_to_flyte_state from flytekit.models import literals @@ -49,6 +49,7 @@ def create( } native_inputs = TypeEngine.literal_map_to_kwargs(ctx, inputs, python_interface_inputs) + logger.info(f"Create BigQuery job config with inputs: {native_inputs}") job_config = bigquery.QueryJobConfig( query_parameters=[ bigquery.ScalarQueryParameter(name, pythonTypeToBigQueryType[python_interface_inputs[name]], val) diff --git a/plugins/flytekit-bigquery/tests/test_backend_plugin.py b/plugins/flytekit-bigquery/tests/test_backend_plugin.py index 63e5077045..c95cf308a7 100644 --- a/plugins/flytekit-bigquery/tests/test_backend_plugin.py +++ b/plugins/flytekit-bigquery/tests/test_backend_plugin.py @@ -38,8 +38,8 @@ def __init__(self): mock_instance.query.return_value = MockJob() mock_instance.cancel_job.return_value = MockJob() - p = BackendPluginRegistry.get_plugin("bigquery_query_job_task") ctx = MagicMock(spec=grpc.ServicerContext) + p = BackendPluginRegistry.get_plugin(ctx, "bigquery_query_job_task") task_id = Identifier( resource_type=ResourceType.TASK, project="project", domain="domain", name="name", version="version" diff --git a/tests/flytekit/unit/extend/test_backend_plugin.py b/tests/flytekit/unit/extend/test_backend_plugin.py index 3881a6de49..9dfd20d99e 100644 --- a/tests/flytekit/unit/extend/test_backend_plugin.py +++ b/tests/flytekit/unit/extend/test_backend_plugin.py @@ -4,6 +4,7 @@ import grpc from flyteidl.service.external_plugin_service_pb2 import ( + PERMANENT_FAILURE, SUCCEEDED, TaskCreateRequest, TaskCreateResponse, @@ -28,9 +29,6 @@ class DummyPlugin(BackendPluginBase): def __init__(self): super().__init__(task_type="dummy") - def initialize(self): - pass - def create( self, context: grpc.ServicerContext, @@ -85,8 +83,8 @@ def delete(self, context: grpc.ServicerContext, job_id) -> TaskDeleteResponse: def test_dummy_plugin(): - p = BackendPluginRegistry.get_plugin("dummy") ctx = MagicMock(spec=grpc.ServicerContext) + p = BackendPluginRegistry.get_plugin(ctx, "dummy") assert p.create(ctx, "/tmp", dummy_template, task_inputs).job_id == dummy_id assert p.get(ctx, dummy_id).state == SUCCEEDED assert p.delete(ctx, dummy_id) == TaskDeleteResponse() @@ -104,4 +102,4 @@ def test_backend_plugin_server(): assert server.DeleteTask(TaskDeleteRequest(task_type="dummy", job_id=dummy_id), ctx) == TaskDeleteResponse() res = server.GetTask(TaskGetRequest(task_type="fake", job_id=dummy_id), ctx) - assert res is None + assert res.state == PERMANENT_FAILURE diff --git a/tests/flytekit/unit/extend/test_model.py b/tests/flytekit/unit/extend/test_model.py deleted file mode 100644 index 0f154450d3..0000000000 --- a/tests/flytekit/unit/extend/test_model.py +++ /dev/null @@ -1,43 +0,0 @@ -from datetime import timedelta - -from flytekit.extend.backend.model import TaskCreateRequest -from flytekit.models import literals -from flytekit.models.core import identifier -from flytekit.models.interface import TypedInterface -from flytekit.models.literals import Literal, LiteralMap, Primitive, Scalar -from flytekit.models.task import Container, Resources, RuntimeMetadata, TaskMetadata, TaskTemplate - - -def test_create_request(): - inputs = LiteralMap({"foo": Literal(scalar=Scalar(primitive=Primitive(integer=2)))}) - resource = [Resources.ResourceEntry(Resources.ResourceName.CPU, "1")] - resources = Resources(resource, resource) - template = TaskTemplate( - identifier.Identifier(identifier.ResourceType.TASK, "project", "domain", "name", "version"), - "python", - TaskMetadata( - True, - RuntimeMetadata(RuntimeMetadata.RuntimeType.FLYTE_SDK, "1.0.0", "python"), - timedelta(days=1), - literals.RetryStrategy(3), - True, - "0.1.1b0", - "This is deprecated!", - True, - "A", - ), - TypedInterface(inputs={}, outputs={}), - {"a": 1, "b": {"c": 2, "d": 3}}, - container=Container( - "my_image", - ["this", "is", "a", "cmd"], - ["this", "is", "an", "arg"], - resources, - {}, - {}, - ), - ) - req = TaskCreateRequest(output_prefix="s3://bucket/key", template=template, inputs=inputs) - assert req.inputs == inputs - assert req.template == template - assert req == TaskCreateRequest.from_flyte_idl(req.to_flyte_idl()) From f594df965eee78d0c76ec757c0833f9663898515 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 5 May 2023 10:56:45 -0700 Subject: [PATCH 49/50] nit Signed-off-by: Kevin Su --- Dockerfile.external-plugin-service | 2 +- flytekit/clis/sdk_in_container/serve.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.external-plugin-service b/Dockerfile.external-plugin-service index 435e15107e..7ea7c8ce5d 100644 --- a/Dockerfile.external-plugin-service +++ b/Dockerfile.external-plugin-service @@ -7,4 +7,4 @@ ARG VERSION RUN pip install -U flytekit==$VERSION \ flytekitplugins-bigquery==$VERSION \ -CMD pyflyte serve --port 80 +CMD pyflyte serve --port 30087 diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 64543406eb..7a51fc179c 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -12,7 +12,7 @@ @click.command("serve", help=_serve_help) @click.option( "--port", - default="80", + default="30087", is_flag=False, type=int, help="Grpc port for the external plugin service", From c5245609fd14a2fed4fe85f3190fc6699364b699 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 5 May 2023 11:43:18 -0700 Subject: [PATCH 50/50] port Signed-off-by: Kevin Su --- Dockerfile.external-plugin-service | 2 +- flytekit/clis/sdk_in_container/serve.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.external-plugin-service b/Dockerfile.external-plugin-service index 7ea7c8ce5d..8a79a31720 100644 --- a/Dockerfile.external-plugin-service +++ b/Dockerfile.external-plugin-service @@ -7,4 +7,4 @@ ARG VERSION RUN pip install -U flytekit==$VERSION \ flytekitplugins-bigquery==$VERSION \ -CMD pyflyte serve --port 30087 +CMD pyflyte serve --port 8000 diff --git a/flytekit/clis/sdk_in_container/serve.py b/flytekit/clis/sdk_in_container/serve.py index 7a51fc179c..71b539d36c 100644 --- a/flytekit/clis/sdk_in_container/serve.py +++ b/flytekit/clis/sdk_in_container/serve.py @@ -12,7 +12,7 @@ @click.command("serve", help=_serve_help) @click.option( "--port", - default="30087", + default="8000", is_flag=False, type=int, help="Grpc port for the external plugin service",