-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
## Overview Introduces Ray framework for distributed task execution and multiprocessing support in Datamimic. This change moves us from single-process execution to a distributed model where: - Parent process handles model parsing and orchestration -> current parent process handles model parsing, but does not do orchestration too much, only gathering generated result - Ray workers handle parallel data generation and processing -> done - Improved resource utilization across multiple cores -> still get some performance as pure multiprocessing before, will check if utilizing across ray clusters ## Key Features - New GenerateWorker class using Ray for distributed execution -> done - Parent/worker process separation for better resource management -> done - Automatic scaling across available CPU cores -> done - Proper context isolation between workers -> done - Efficient cleanup of resources -> done ## Technical Implementation - Ray workers handle chunks of data generation tasks -> done - Parent process manages task distribution and result aggregation -> done - Context copying ensures proper isolation between workers -> done - Resource cleanup in finally blocks -> done - Error propagation from workers to parent process -> need check more ## Architecture Parent Process -> done - Parse models - Initialize Ray - Distribute tasks to workers - Aggregate results Ray Workers (@ray.remote) -> use ray task (ray.remote on function) instead of ray actor (class object) to reduce unnecessary overhead of creating and managing ray actor. - Handle data generation - Process data chunks - Manage database connections -> not need - Clean up resources ## Notes This is our first implementation using Ray for distributed processing. The architecture separates concerns between parent orchestration and worker execution while maintaining our existing model parsing and validation. --------- Co-authored-by: Dang Ly <[email protected]>
- Loading branch information
Showing
50 changed files
with
1,387 additions
and
1,704 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,10 +5,16 @@ | |
# For questions and support, contact: [email protected] | ||
import argparse | ||
import logging | ||
import os | ||
import traceback | ||
import uuid | ||
from pathlib import Path | ||
|
||
# Avoid deduplication of logs in Ray, MUST be set before importing ray | ||
os.environ["RAY_DEDUP_LOGS"] = "0" | ||
|
||
import ray | ||
|
||
from datamimic_ce.config import settings | ||
from datamimic_ce.exporters.test_result_exporter import TestResultExporter | ||
from datamimic_ce.logger import logger, setup_logger | ||
|
@@ -20,30 +26,32 @@ | |
|
||
LOG_FILE = "datamimic.log" | ||
|
||
ray.init(ignore_reinit_error=True, local_mode=settings.RAY_DEBUG, include_dashboard=False) | ||
|
||
|
||
class DataMimic: | ||
def __init__( | ||
self, | ||
descriptor_path: Path, | ||
task_id: str | None = None, | ||
platform_props: dict[str, str] | None = None, | ||
platform_configs: dict | None = None, | ||
test_mode: bool = False, | ||
args: argparse.Namespace | None = None, | ||
self, | ||
descriptor_path: Path, | ||
task_id: str | None = None, | ||
platform_props: dict[str, str] | None = None, | ||
platform_configs: dict | None = None, | ||
test_mode: bool = False, | ||
args: argparse.Namespace | None = None, | ||
): | ||
""" | ||
Initialize DataMimic with descriptor_path. | ||
""" | ||
# Set up logger | ||
log_level = getattr(logging, args.log_level.upper(), logging.INFO) if args else logging.INFO | ||
setup_logger(logger_name=settings.DEFAULT_LOGGER, task_id=task_id, level=log_level) | ||
setup_logger(logger_name=settings.DEFAULT_LOGGER, worker_name="MAIN", level=log_level) | ||
|
||
self._task_id = task_id or uuid.uuid4().hex | ||
self._descriptor_path = descriptor_path | ||
self._platform_props = platform_props | ||
self._platform_configs = platform_configs | ||
self._test_mode = test_mode | ||
self._test_result_storage = TestResultExporter() if test_mode else None | ||
self._test_result_storage = TestResultExporter() | ||
|
||
# Initialize logging | ||
log_system_info() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,10 +4,6 @@ | |
# See LICENSE file for the full text of the license. | ||
# For questions and support, contact: [email protected] | ||
|
||
from abc import ABC, abstractmethod | ||
|
||
|
||
class Exporter(ABC): | ||
@abstractmethod | ||
def consume(self, product: tuple): | ||
pass | ||
class Exporter: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# DATAMIMIC | ||
# Copyright (c) 2023-2024 Rapiddweller Asia Co., Ltd. | ||
# This software is licensed under the MIT License. | ||
# See LICENSE file for the full text of the license. | ||
# For questions and support, contact: [email protected] | ||
|
||
|
||
class ExporterStateManager: | ||
""" | ||
Manages the state of exporters for each worker. | ||
""" | ||
|
||
def __init__(self, worker_id): | ||
self._worker_id = worker_id | ||
self._storage_dict = {} | ||
|
||
@property | ||
def worker_id(self): | ||
return self._worker_id | ||
|
||
def add_storage(self, key: str, chunk_size: int): | ||
self._storage_dict[key] = ExporterStateStorage(chunk_size) | ||
|
||
def get_storage(self, key: str): | ||
if key not in self._storage_dict: | ||
self._storage_dict[key] = ExporterStateStorage(None) | ||
return self._storage_dict[key] | ||
|
||
def load_exporter_state(self, key: str): | ||
storage = self.get_storage(key) | ||
|
||
return storage.global_counter, storage.current_counter, storage.chunk_index, storage.chunk_size | ||
|
||
def rotate_chunk(self, key: str): | ||
storage = self.get_storage(key) | ||
|
||
storage.chunk_index = storage.chunk_index + 1 | ||
storage.current_counter = 0 | ||
|
||
def save_state(self, key: str, global_counter: int, current_counter: int): | ||
storage = self.get_storage(key) | ||
|
||
storage.global_counter = global_counter | ||
storage.current_counter = current_counter | ||
|
||
|
||
class ExporterStateStorage: | ||
""" | ||
Stores the state of an exporter for a worker. | ||
""" | ||
|
||
def __init__(self, chunk_size: int | None): | ||
self._global_counter = 0 | ||
self._current_counter = 0 | ||
self._chunk_index = 0 | ||
self._chunk_size = chunk_size | ||
|
||
@property | ||
def global_counter(self): | ||
return self._global_counter | ||
|
||
@global_counter.setter | ||
def global_counter(self, value): | ||
self._global_counter = value | ||
|
||
@property | ||
def current_counter(self): | ||
return self._current_counter | ||
|
||
@current_counter.setter | ||
def current_counter(self, value): | ||
self._current_counter = value | ||
|
||
@property | ||
def chunk_size(self): | ||
return self._chunk_size | ||
|
||
@property | ||
def chunk_index(self): | ||
return self._chunk_index | ||
|
||
@chunk_index.setter | ||
def chunk_index(self, value): | ||
self._chunk_index = value |
Oops, something went wrong.