Skip to content

Commit

Permalink
readme and makefile fixes
Browse files Browse the repository at this point in the history
Signed-off-by: blublinsky <[email protected]>
  • Loading branch information
blublinsky committed Apr 25, 2024
1 parent f508b0d commit e43ed08
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class KFPUtils:

@staticmethod
def credentials(
access_key: str = "S3_KEY", secret_key: str = "S3_SECRET", endpoint: str = "ENDPOINT"
access_key: str = "S3_KEY", secret_key: str = "S3_SECRET", endpoint: str = "ENDPOINT"
) -> tuple[str, str, str]:
"""
Get credentials from the environment
Expand Down Expand Up @@ -144,10 +144,10 @@ def __init__(self, host: str = "http://localhost:8080"):
self.kfp_client = Client(host=host)

def start_pipeline(
self,
pipeline: models.api_pipeline.ApiPipeline,
experiment: models.api_experiment.ApiExperiment,
params: Optional[dict[str, Any]],
self,
pipeline: models.api_pipeline.ApiPipeline,
experiment: models.api_experiment.ApiExperiment,
params: Optional[dict[str, Any]],
) -> str:
"""
Start a specified pipeline.
Expand Down Expand Up @@ -240,11 +240,11 @@ class supporting Ray remote jobs
ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")

def __init__(
self,
server_url: str = "http://kuberay-apiserver-service.kuberay.svc.cluster.local:8888",
default_image: str = "rayproject/ray:2.9.3-py310",
http_retries: int = 5,
wait_interval: int = 2,
self,
server_url: str = "http://kuberay-apiserver-service.kuberay.svc.cluster.local:8888",
default_image: str = "rayproject/ray:2.9.3-py310",
http_retries: int = 5,
wait_interval: int = 2,
):
"""
Initialization
Expand All @@ -259,12 +259,12 @@ def __init__(
self.default_image = default_image

def create_ray_cluster(
self,
name: str,
namespace: str,
head_node: dict[str, Any],
worker_nodes: list[dict[str, Any]],
wait_cluster_ready: int = -1,
self,
name: str,
namespace: str,
head_node: dict[str, Any],
worker_nodes: list[dict[str, Any]],
wait_cluster_ready: int = -1,
) -> tuple[int, str]:
"""
Create Ray cluster
Expand Down Expand Up @@ -423,7 +423,7 @@ def create_ray_cluster(
# Build cluster spec
cluster_spec = ClusterSpec(head_node=head_node_spec, worker_groups=worker_groups)
# Build cluster
cluster = Cluster(name=name, namespace=namespace, user="goofy", version="2.9.0", cluster_spec=cluster_spec)
cluster = Cluster(name=name, namespace=namespace, user="dataprep", version="2.9.3", cluster_spec=cluster_spec)
status, error = self.api_server_client.create_cluster(cluster)
if status != 200:
return status, error
Expand Down Expand Up @@ -455,12 +455,12 @@ def delete_ray_cluster(self, name: str, namespace: str) -> tuple[int, str]:
return status, error

def submit_job(
self,
name: str,
namespace: str,
request: dict[str, Any],
runtime_env: str = None,
executor: str = "transformer_launcher.py",
self,
name: str,
namespace: str,
request: dict[str, Any],
runtime_env: str = None,
executor: str = "transformer_launcher.py",
) -> tuple[int, str, str]:
"""
Submit job for execution
Expand Down Expand Up @@ -511,13 +511,13 @@ def _print_log(log: str, previous_log_len: int) -> None:
print(l_to_print)

def follow_execution(
self,
name: str,
namespace: str,
submission_id: str,
data_access: DataAccess = None,
job_ready_timeout: int = 600,
print_timeout: int = 120,
self,
name: str,
namespace: str,
submission_id: str,
data_access: DataAccess = None,
job_ready_timeout: int = 600,
print_timeout: int = 120,
) -> None:
"""
Follow remote job execution
Expand Down Expand Up @@ -595,10 +595,10 @@ class ComponentUtils:

@staticmethod
def add_settings_to_component(
component: dsl.ContainerOp,
timeout: int,
image_pull_policy: str = "Always",
cache_strategy: str = "P0D",
component: dsl.ContainerOp,
timeout: int,
image_pull_policy: str = "Always",
cache_strategy: str = "P0D",
) -> None:
"""
Add settings to kfp component
Expand All @@ -616,10 +616,10 @@ def add_settings_to_component(

@staticmethod
def set_s3_env_vars_to_component(
component: dsl.ContainerOp,
secret: str,
env2key: dict[str, str] = {"S3_KEY": "s3-key", "S3_SECRET": "s3-secret", "ENDPOINT": "s3-endpoint"},
prefix: str = None,
component: dsl.ContainerOp,
secret: str,
env2key: dict[str, str] = {"S3_KEY": "s3-key", "S3_SECRET": "s3-secret", "ENDPOINT": "s3-endpoint"},
prefix: str = None,
) -> None:
"""
Set S3 env variables to KFP component
Expand All @@ -642,8 +642,8 @@ def set_s3_env_vars_to_component(

@staticmethod
def default_compute_execution_params(
worker_options: str, # ray worker configuration
actor_options: str, # cpus per actor
worker_options: str, # ray worker configuration
actor_options: str, # cpus per actor
) -> str:
"""
This is the most simplistic transform execution parameters computation
Expand Down Expand Up @@ -685,4 +685,4 @@ def default_compute_execution_params(
)
sys.exit(1)

return str(n_actors)
return str(n_actors)
2 changes: 1 addition & 1 deletion kfp/transform_workflows/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ clean::
make -C $(shell (dirname $<)) .reconcile-requirement PIPELINE_FILE=${CURR_DIR}/$<
make .compile-pipeline WF_NAME=$(shell (basename $< .py)) DIR=$(shell (dirname $<))

build::
build:: venv
find . -iname "*_wf.yaml" | xargs rm -f || true
@# Help: Compile the pipelines
${PYTHON} -m pip install --upgrade build
Expand Down
21 changes: 11 additions & 10 deletions kind/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@ Ensure that you have the following:
- [Kind](https://kind.sigs.k8s.io/) tool for running local Kubernetes clusters 0.14.0 or newer must be installed on your machine.
- [Kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) 1.26 or newer must be installed on your machine.
- [wget](https://www.gnu.org/software/wget/) 1.21 must be installed on your machine.
- [lsof](https://formulae.brew.sh/formula/lsof) must be installed on your machine
- [MinIO Client (mc)](https://min.io/docs/minio/linux/reference/minio-mc.html#quickstart) must be installed on your machine.
- ensure nothing is running on port 8080, which is used by KInd cluster ingress.

## Preparing Kind cluster for testing

This is a manual build instruction. As an alternative, you can execute the `make setup` makefile rule in
the project `kind` directory instead. `make setup` performs complete installation, including creation of
the cluster, installing required software (NGNIX, KubeRay and KFP), creating ingresses and secrets and
loading local data to Minio.
This is a manual build instruction. As an alternative, you can execute the `make setup` makefile rule in
the project `kind` directory instead. `make setup` performs complete installation, including validation that port
8080 is available, creation of the cluster, installing required software (NGNIX, KubeRay and KFP), creating
ingresses and secrets and loading local data to Minio.


### Create cluster
Expand All @@ -25,10 +26,10 @@ Run the following command to create the cluster:

```shell
cd /tmp
git clone [email protected]:IBM/data-prep-lab.git
cd data-prep-lab
git clone git@github.ibm.com:ai-models-data/fm-data-engineering.git
cd fm-data-engineering
ROOT_DIR=$PWD/kind/
kind create cluster --name goofy --config ${ROOT_DIR}/hack/kind-cluster-config.yaml
kind create cluster --name dataprep --config ${ROOT_DIR}/hack/kind-cluster-config.yaml
```

Note that by default this will create a kind cluster with 2 worker nodes. If you would like a different
Expand Down Expand Up @@ -103,7 +104,7 @@ kubectl apply -f $ROOT_DIR/hack/s3_secret.yaml
```

### Working with a Minio server instead of S3 storage
You can work with a real S3 storage, but for testing you can use the Mino server which is deployed as part of the KFP
You can work with a real S3 storage, but for testing you can use the Mino server which is deployed as part of the KFP
installation.

#### Copy test data
Expand All @@ -114,7 +115,7 @@ Populating Minio server with test data can be done using `mc`. Use the following
./hack/populate_minio.sh
```

This file create an mc alias, create the test bucket and copy local data to Minio. If you need
This file create an mc alias, create the test bucket and copy local data to Minio. If you need
to load additional data, please load it using additional `mc` commands, similar to the ones being
used by `populate_minio.sh`

Expand All @@ -126,7 +127,7 @@ as the secret. The secret to use for Minio access is located in `kubeflow` ns wi
delete the cluster:

```shell
kind delete cluster --name goofy
kind delete cluster --name dataprep
```

alternatively you can execute
Expand Down
30 changes: 29 additions & 1 deletion kind/hack/tools/kind_management.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,34 @@ kind_delete() {
}

kind_create() {
if ! command -v lsof &> /dev/null ; then
echo "lsof could not be found. Please install it and try again"
exit 1
fi
if ! command -v kind &> /dev/null ; then
echo "kind could not be found. Please install it and try again"
exit 1
fi
if ! command -v helm &> /dev/null ; then
echo "helm could not be found. Please install it and try again"
exit 1
fi
if ! command -v kubectl &> /dev/null ; then
echo "kubectl could not be found. Please install it and try again"
exit 1
fi
if ! command -v wget &> /dev/null ; then
echo "wget could not be found"
exit 1
fi
if ! command -v mc &> /dev/null ; then
echo "mc could not be found"
exit 1
fi
if lsof -Pi :8080 -sTCP:LISTEN -t >/dev/null ; then
echo "port 8080 is in use, please clear the port and try again"
exit 1
fi
kind create cluster --name $cluster_name --config ${ROOT_DIR}/hack/kind-cluster-config.yaml
}

Expand All @@ -31,4 +59,4 @@ create_cluster)
*)
usage
;;
esac
esac

0 comments on commit e43ed08

Please sign in to comment.