Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Runtime reorg #230

Merged
merged 6 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ __check_defined = \
.defaults.ray-lib-src-venv:: .defaults.create-venv .defaults.install-ray-lib-src-venv .defaults.install-local-requirements-venv

# Install all source from the repo for a ray runtime transform into an existing venv
# And if there is an adjacent python dir (as for transforms), then also install that source
.PHONY: .defaults.install-ray-lib-src-venv
.defaults.install-ray-lib-src-venv::
@# Help: Install Ray and Python data processing library source into existing venv
Expand All @@ -285,6 +286,9 @@ __check_defined = \
pip uninstall -y data-prep-toolkit-ray; \
$(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \
$(MAKE) PYTHON_PROJECT_DIR=$(DPK_RAY_LIB_DIR) .defaults.install-src-venv; \
if [ -d ../python ]; then \
$(MAKE) PYTHON_PROJECT_DIR=../python .defaults.install-src-venv; \
fi
echo Installed source from Ray data processing library for `which $(PYTHON)`

# Install local requirements last as it generally includes our lib source
Expand Down
2 changes: 2 additions & 0 deletions .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ DOC_ID_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
DOC_ID_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
EDEDUP_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FDEDUP_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FILTER_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FILTER_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FILTER_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX)
FILTER_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX)
NOOP_PYTHON_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX)
Expand Down
9 changes: 3 additions & 6 deletions transforms/.make.transforms
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,6 @@ extra-help:

.PHONY: .transforms.ray-venv
.transforms.ray-venv:: .defaults.ray-lib-src-venv
if [ -d ../python ]; then \
$(MAKE) PYTHON_PROJECT_DIR=../python .defaults.install-src-venv; \
fi

# For now we do NOT install ../python source as we do for ray, since for now
# spark implementations do not use the pure python transform. If/when
Expand Down Expand Up @@ -153,13 +150,13 @@ extra-help:
@# Help: Run the transform's tests and any '*local' .py files

.PHONY: .transforms.python-test-image
.transforms.python-test-image:: .transforms.python-image .defaults.test-image-pytest
.transforms.python-test-image:: .transforms.python-image .transforms.test-image-help .defaults.test-image-pytest

.PHONY: .transforms.ray-test-image
.transforms.ray-test-image:: .transforms.ray-image .defaults.test-image-pytest
.transforms.ray-test-image:: .transforms.ray-image .transforms.test-image-help .defaults.test-image-pytest

.PHONY: .transforms.spark-test-image
.transforms.spark-test-image:: .transforms.spark-image .defaults.test-image-pytest
.transforms.spark-test-image:: .transforms.spark-image .transforms.test-image-help .defaults.test-image-pytest

.PHONY: .transforms.test-image-pytest
.transforms.test-image-pytest:: .defaults.test-image-pytest
Expand Down
11 changes: 0 additions & 11 deletions transforms/fix.sh

This file was deleted.

1 change: 1 addition & 0 deletions transforms/universal/filter/python/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
venv/
38 changes: 38 additions & 0 deletions transforms/universal/filter/python/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
test-data/output
output/*
/output/
data-processing-lib/


# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class


# Distribution / packaging
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
.tox/
htmlcov
.coverage
.cache
nosetests.xml
coverage.xml
43 changes: 43 additions & 0 deletions transforms/universal/filter/python/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .

# END OF STEPS destined for a data-prep-kit base image

COPY --chown=dpk:root src/ src/
COPY --chown=dpk:root pyproject.toml pyproject.toml
RUN pip install --no-cache-dir -e .

#COPY requirements.txt requirements.txt
#RUN pip install --no-cache-dir -r requirements.txt

# copy source data
COPY ./src/filter_transform.py .
COPY ./src/filter_transform_python.py .
COPY ./src/filter_local.py local/

# copy test
COPY test/ test/
COPY test-data/ test-data/

# Set environment
ENV PYTHONPATH /home/dpk

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
55 changes: 55 additions & 0 deletions transforms/universal/filter/python/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Define the root of the local git clone for the common rules to be able
# know where they are running from.
REPOROOT=../../../..
# Include a library of common .transform.* targets which most
# transforms should be able to reuse. However, feel free
# to override/redefine the rules below.

# $(REPOROOT)/.make.versions file contains the versions

TRANSFORM_NAME=filter
DOCKER_IMAGE_VERSION=${FILTER_PYTHON_VERSION}

include $(REPOROOT)/transforms/.make.transforms

venv:: .transforms.python-venv

test:: .transforms.python-test

clean:: .transforms.clean

image:: .transforms.python-image

test-src:: .transforms.test-src

setup:: .transforms.setup

build:: build-dist image

publish:: publish-dist publish-image

publish-image:: .transforms.publish-image-python

setup:: .transforms.setup

# distribution versions is the same as image version.
set-versions:
$(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml

build-dist:: set-versions .defaults.build-dist

publish-dist:: .defaults.publish-dist

test-image:: .transforms.python-test-image

run-cli-sample: .transforms.run-cli-python-sample

run-local-sample: .transforms.run-local-sample

run-local-python-sample: .transforms.run-local-python-sample

#run-s3-ray-sample: .transforms.run-s3-ray-sample

minio-start: .minio-start

load-image:: .transforms.load-image
Loading