From fb060db5fee08a7969e69ae5412b6943bf88f3da Mon Sep 17 00:00:00 2001 From: Minutis Date: Wed, 18 Jan 2023 23:25:48 +0200 Subject: [PATCH] Add Minikube quickstart guide --- README.md | 1 + docs/kubernetes.md | 151 +------------------ quickstart/README.md | 77 ++++++++++ quickstart/jupyterlab.yml | 191 +++++++++++++++++++++++++ quickstart/lighter.yml | 95 ++++++++++++ quickstart/lighter/lighter-ingress.yml | 28 ++++ quickstart/nginx/jupyterlab-nginx.yml | 19 +++ quickstart/nginx/lighter-nginx.yml | 19 +++ 8 files changed, 432 insertions(+), 149 deletions(-) create mode 100644 quickstart/README.md create mode 100644 quickstart/jupyterlab.yml create mode 100644 quickstart/lighter.yml create mode 100644 quickstart/lighter/lighter-ingress.yml create mode 100644 quickstart/nginx/jupyterlab-nginx.yml create mode 100644 quickstart/nginx/lighter-nginx.yml diff --git a/README.md b/README.md index 3b5b2270..8d389bf5 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Lighter supports: You can read a breaf description on how Lighter works [here](./docs/architecture.md). ## Using Lighter +- [Quickstart with Minikube](./quickstart/README.md) - [Installation on Kubernetes](./docs/kubernetes.md) - [Installation on Docker](./docs/docker.md) - [Configuration Properties](./docs/configuration.md) diff --git a/docs/kubernetes.md b/docs/kubernetes.md index 4a1511be..f793a7ac 100644 --- a/docs/kubernetes.md +++ b/docs/kubernetes.md @@ -1,153 +1,6 @@ # Running Lighter on Kubernetes You need to create multiple resources to add Lighter to your Kubernetes cluster. -On these examples we assume that you are running your spark related services on `spark` namespace. +On the following examples we assume that you are running your spark related services on `spark` namespace. -## ServiceAccount and RoleBinding - -```yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: spark - namespace: spark ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: lighter-spark - namespace: spark -rules: -- apiGroups: [""] - resources: ["pods", "services", "configmaps", "pods/log"] - verbs: ["*"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: lighter-spark - namespace: spark -subjects: -- kind: ServiceAccount - name: spark - namespace: spark -roleRef: - kind: Role - name: lighter-spark - apiGroup: rbac.authorization.k8s.io -``` - -## Service -```yaml -apiVersion: v1 -kind: Service -metadata: - name: lighter - namespace: spark - labels: - run: lighter -spec: - ports: - - name: api - port: 8080 - protocol: TCP - - name: javagw - port: 25333 - protocol: TCP - selector: - run: lighter -``` - -## Deployment - -Make sure to change `env` values to valid ones. -[Click here](./configuration.md) to see all possible configuration options. - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - namespace: spark - name: lighter -spec: - selector: - matchLabels: - run: lighter - replicas: 1 - strategy: - rollingUpdate: - maxUnavailable: 0 - maxSurge: 1 - template: - metadata: - labels: - run: lighter - spec: - containers: - - image: ghcr.io/exacaster/lighter:0.0.41-spark3.3.1 - name: lighter - readinessProbe: - httpGet: - path: /health/readiness - port: 8080 - initialDelaySeconds: 15 - periodSeconds: 15 - resources: - requests: - cpu: "0.25" - memory: "512Mi" - ports: - - containerPort: 8080 - env: - - name: LIGHTER_KUBERNETES_ENABLED - value: "true" - - name: LIGHTER_STORAGE_JDBC_USERNAME - value: postgres_user - - name: LIGHTER_STORAGE_JDBC_PASSWORD - value: postgres_password - - name: LIGHTER_STORAGE_JDBC_URL - value: jdbc:postgresql://postgres_host_name:5432/lighter - - name: LIGHTER_STORAGE_JDBC_DRIVER_CLASS_NAME - value: org.postgresql.Driver - - name: LIGHTER_SPARK_HISTORY_SERVER_URL - value: https://address_to_spark_history/spark-history - - name: LIGHTER_MAX_RUNNING_JOBS - value: "15" - serviceAccountName: spark -``` - -## Ingress - -To make your Lighter UI accessible, you also need to add an Ingress component. -For example, if you are using Traefik Ingress controller, something like this should work: - -```yaml -apiVersion: traefik.containo.us/v1alpha1 -kind: IngressRoute -metadata: - name: lighter-ingress-route - namespace: spark -spec: - entryPoints: - - web - routes: - - match: PathPrefix(`/lighter`) - kind: Rule - services: - - name: lighter - port: 8080 - middlewares: - - name: lighter-custom-headers ---- -apiVersion: traefik.containo.us/v1alpha1 -kind: Middleware -metadata: - name: lighter-custom-headers - namespace: spark -spec: - headers: - customRequestHeaders: - X-Forwarded-Prefix: /lighter - X-Forwarded-Proto: https - X-Forwarded-Port: "443" -``` +Use [quickstart](../quickstart/) as a starting point and adjust the configuration to fit your environment. diff --git a/quickstart/README.md b/quickstart/README.md new file mode 100644 index 00000000..ca76e1ff --- /dev/null +++ b/quickstart/README.md @@ -0,0 +1,77 @@ +# Minikube quickstart + +### Install Minikube +Follow instructions in https://minikube.sigs.k8s.io/docs/start/ to install the minikube. + +### Start Minikube +``` +minikube start +``` + +### Enable Minikube `ingress` addon +``` +minikube addons enable ingress +``` + +### Apply quickstart `.yml` configuration +Go to the location where you cloned the repository i.e.: `cd ~/git/lighter`: +1. Create main resources +``` +minikube kubectl -- apply -f quickstart/lighter.yml +minikube kubectl -- apply -f quickstart/jupyterlab.yml +``` +2. Create `ingress` resources +``` +minikube kubectl -- apply -f quickstart/nginx/lighter-nginx.yml +minikube kubectl -- apply -f quickstart/nginx/jupyterlab-nginx.yml +``` + +### Verify deployment +**_NOTE:_** Jupyterlab can take few minutes to start because the container is being altered on runtime. +``` +minikube kubectl -- -n spark get all +``` +``` +NAME READY STATUS RESTARTS AGE +pod/jupyterlab-5685dcf5c7-ggcpl 1/1 Running 0 4m34s +pod/lighter-5484769777-7r9ln 1/1 Running 0 5m12s + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/jupyterlab ClusterIP 10.100.99.140 8888/TCP 4m34s +service/lighter ClusterIP 10.98.245.141 8080/TCP,25333/TCP 5m12s + +NAME READY UP-TO-DATE AVAILABLE AGE +deployment.apps/jupyterlab 1/1 1 1 4m34s +deployment.apps/lighter 1/1 1 1 5m12s + +NAME DESIRED CURRENT READY AGE +replicaset.apps/jupyterlab-5685dcf5c7 1 1 1 4m34s +replicaset.apps/lighter-5484769777 1 1 1 5m12s +``` +CDRs (from `ingress` addon) needs separate call: +``` +minikube kubectl -- -n spark get ingress +``` +``` +NAME CLASS HOSTS ADDRESS PORTS AGE +jupyterlab nginx spark.local 192.168.49.2 80 87s +lighter nginx spark.local 192.168.49.2 80 87s +``` + +### Optional: Windows with WSL2 with Ubuntu +1. Run this in WSL2 +``` +minikube tunnel +``` +2. Add this entry to your `C:\Windows\System32\Drivers\etc\hosts` file on Windows: +``` +127.0.0.1 spark.local +``` + +### Try it out! + +Lighter URL: http://spark.local/lighter/ + +Jupyterlab URL: http://spark.local/jupyterlab/ + +**_NOTE:_** Jupyterlab will already have `quickstart.ipynb` notebook with two cells. Execute them and that's it. \ No newline at end of file diff --git a/quickstart/jupyterlab.yml b/quickstart/jupyterlab.yml new file mode 100644 index 00000000..607bdcc6 --- /dev/null +++ b/quickstart/jupyterlab.yml @@ -0,0 +1,191 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: jupyterlab + namespace: spark +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: jupyterlab + namespace: spark +rules: + - apiGroups: [ "" ] + resources: [ "pods", "services", "configmaps", "pods/log" ] + verbs: [ "*" ] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: jupyterlab + namespace: spark +subjects: + - kind: ServiceAccount + name: jupyterlab + namespace: spark +roleRef: + kind: Role + name: jupyterlab + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: Service +metadata: + name: jupyterlab + namespace: spark + labels: + run: jupyterlab +spec: + ports: + - port: 8888 + protocol: TCP + selector: + run: jupyterlab +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: spark + name: jupyterlab +spec: + selector: + matchLabels: + run: jupyterlab + replicas: 1 + strategy: + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + metadata: + labels: + run: jupyterlab + spec: + containers: + - image: jupyter/base-notebook:lab-3.5.2 + name: jupyterlab + resources: + requests: + cpu: "0.25" + memory: "512Mi" + ports: + - containerPort: 8888 + command: [ "start-notebook.sh" ] + args: + - "--NotebookApp.token=" + - "--NotebookApp.base_url=jupyterlab" + - "--KernelSpecManager.ensure_native_kernel=False" + - "--NotebookApp.notebook_dir=/home/jovyan/work" + lifecycle: + postStart: + exec: + command: + - /bin/sh + - -c + - | + conda install sparkmagic -y + jupyter kernelspec uninstall sparkkernel -y + jupyter kernelspec uninstall sparkrkernel -y + jupyter kernelspec uninstall python3 -y + mkdir -p /home/jovyan/.sparkmagic/ + cp /tmp/sparkmagic.json /home/jovyan/.sparkmagic/config.json + cp /tmp/quickstart.ipynb /home/jovyan/work/quickstart.ipynb + volumeMounts: + - name: sparkmagic + mountPath: /tmp/sparkmagic.json + subPath: sparkmagic + - name: quickstart + mountPath: /tmp/quickstart.ipynb + subPath: quickstart + serviceAccountName: jupyterlab + volumes: + - name: sparkmagic + configMap: + name: sparkmagic + - name: quickstart + configMap: + name: quickstart +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sparkmagic + namespace: spark +data: + sparkmagic: | + { + "kernel_python_credentials" : { + "username": "", + "password": "", + "url": "http://lighter.spark:8080/lighter/api", + "auth": "None" + }, + "livy_session_startup_timeout_seconds": 600, + "custom_headers": { + "X-Compatibility-Mode": "sparkmagic" + } + } +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: quickstart + namespace: spark +data: + quickstart: | + { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "c72ef05c-13ee-4199-a002-b2be815ce419", + "metadata": {}, + "outputs": [], + "source": [ + "%%configure -f\n", + "{\n", + " \"name\": \"Test Lighter\",\n", + " \"conf\":{\n", + " \"spark.kubernetes.container.image\": \"apache/spark-py:v3.3.1\"\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94d68783-5c86-440e-84da-aa8be288cdd5", + "metadata": {}, + "outputs": [], + "source": [ + "df = spark.createDataFrame(\n", + " [\n", + " (1, \"foo\"),\n", + " (2, \"bar\"),\n", + " ],\n", + " [\"id\", \"label\"]\n", + ")\n", + "df.collect()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PySpark", + "language": "python", + "name": "pysparkkernel" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "pyspark", + "pygments_lexer": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 + } diff --git a/quickstart/lighter.yml b/quickstart/lighter.yml new file mode 100644 index 00000000..4ce158a1 --- /dev/null +++ b/quickstart/lighter.yml @@ -0,0 +1,95 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: spark +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: lighter + namespace: spark +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: lighter + namespace: spark +rules: + - apiGroups: [""] + resources: ["pods", "services", "configmaps", "pods/log"] + verbs: ["*"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: lighter + namespace: spark +subjects: + - kind: ServiceAccount + name: lighter + namespace: spark +roleRef: + kind: Role + name: lighter + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: Service +metadata: + name: lighter + namespace: spark + labels: + run: lighter +spec: + ports: + - name: api + port: 8080 + protocol: TCP + - name: javagw + port: 25333 + protocol: TCP + selector: + run: lighter +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: spark + name: lighter +spec: + selector: + matchLabels: + run: lighter + replicas: 1 + strategy: + rollingUpdate: + maxUnavailable: 0 + maxSurge: 1 + template: + metadata: + labels: + run: lighter + spec: + containers: + - image: ghcr.io/exacaster/lighter:0.0.41-spark3.3.1 + name: lighter + readinessProbe: + httpGet: + path: /health/readiness + port: 8080 + initialDelaySeconds: 15 + periodSeconds: 15 + resources: + requests: + cpu: "0.25" + memory: "512Mi" + ports: + - containerPort: 8080 + env: + - name: LIGHTER_KUBERNETES_ENABLED + value: "true" + - name: LIGHTER_MAX_RUNNING_JOBS + value: "15" + - name: LIGHTER_KUBERNETES_SERVICE_ACCOUNT + value: lighter + serviceAccountName: lighter diff --git a/quickstart/lighter/lighter-ingress.yml b/quickstart/lighter/lighter-ingress.yml new file mode 100644 index 00000000..e6acb116 --- /dev/null +++ b/quickstart/lighter/lighter-ingress.yml @@ -0,0 +1,28 @@ +apiVersion: traefik.containo.us/v1alpha1 +kind: IngressRoute +metadata: + name: lighter-ingress-route + namespace: spark +spec: + entryPoints: + - web + routes: + - match: PathPrefix(`/lighter`) + kind: Rule + services: + - name: lighter + port: 8080 + middlewares: + - name: lighter-custom-headers +--- +apiVersion: traefik.containo.us/v1alpha1 +kind: Middleware +metadata: + name: lighter-custom-headers + namespace: spark +spec: + headers: + customRequestHeaders: + X-Forwarded-Prefix: /lighter + X-Forwarded-Proto: https + X-Forwarded-Port: "443" \ No newline at end of file diff --git a/quickstart/nginx/jupyterlab-nginx.yml b/quickstart/nginx/jupyterlab-nginx.yml new file mode 100644 index 00000000..6c185aad --- /dev/null +++ b/quickstart/nginx/jupyterlab-nginx.yml @@ -0,0 +1,19 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: jupyterlab + namespace: spark + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /jupyterlab/$2 +spec: + rules: + - host: spark.local + http: + paths: + - path: /jupyterlab(/|$)(.*) + pathType: Prefix + backend: + service: + name: jupyterlab + port: + number: 8888 diff --git a/quickstart/nginx/lighter-nginx.yml b/quickstart/nginx/lighter-nginx.yml new file mode 100644 index 00000000..a988cd84 --- /dev/null +++ b/quickstart/nginx/lighter-nginx.yml @@ -0,0 +1,19 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: lighter + namespace: spark + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /lighter/$2 +spec: + rules: + - host: spark.local + http: + paths: + - path: /lighter(/|$)(.*) + pathType: Prefix + backend: + service: + name: lighter + port: + number: 8080