-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMakefile
146 lines (127 loc) · 6.96 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
.DEFAULT_GOAL := help
DOCKER_MVN_VERSION ?= 3.5.4
DOCKER_JDK_VERSION ?= 8
DOCKER_GIT_SPARK ?= v2.4.4
DOCKER_ORG ?= hypnosapos
DOCKER_IMAGE ?= sparknetes
DOCKER_TAG ?= 2.4
DOCKER_USERNAME ?= engapa
DOCKER_PASSWORD ?= secretito
GKE_CLUSTER_NAME ?= spark
UNAME := $(shell uname -s)
ifeq ($(UNAME),Linux)
OPEN := xdg-open
else
OPEN := open
endif
.PHONY: help
help: ## Show this help.
@grep -E '^[0-9a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
.PHONY: clean
clean:
@docker rm -f $$(docker ps -a -f "ancestor=$(DOCKER_ORG)/$(DOCKER_IMAGE):$(DOCKER_TAG)" --format '{{.Names}}') > /dev/null 2>&1 || true
@docker rmi -f $(DOCKER_ORG)/$(DOCKER_IMAGE):$(DOCKER_TAG) > /dev/null 2>&1 || true
.PHONY: sparknetes-build
sparknetes-build: ## Build the docker image of builder.
@docker build \
--build-arg MVN_VERSION=$(DOCKER_MVN_VERSION) \
--build-arg JDK_VERSION=$(DOCKER_JDK_VERSION) \
--build-arg GIT_SPARK=$(DOCKER_GIT_SPARK) \
-t $(DOCKER_ORG)/$(DOCKER_IMAGE):$(DOCKER_TAG) .
.PHONY: sparknetes-push
sparknetes-push: ## Publish sparknetes docker image.
@docker push $(DOCKER_ORG)/$(DOCKER_IMAGE):$(DOCKER_TAG)
.PHONY: spark-image
spark-image: ## Build and push a docker image for spark pods.
@docker pull $(DOCKER_ORG)/$(DOCKER_IMAGE):$(DOCKER_TAG)
@docker run -it --rm\
-v /var/run/docker.sock:/var/run/docker.sock\
$(DOCKER_ORG)/$(DOCKER_IMAGE):$(DOCKER_TAG) \
bash -c "docker login -u $(DOCKER_USERNAME) -p $(DOCKER_PASSWORD) \
&& ./bin/docker-image-tool.sh -r docker.io/$(DOCKER_ORG) -t $(DOCKER_TAG) build\
&& ./bin/docker-image-tool.sh -r docker.io/$(DOCKER_ORG) -t $(DOCKER_TAG) push"
.PHONY: gke-spark-bootstrap
gke-spark-bootstrap: ## Setup kubernetes cluster for spark examples. Visit repo k8s-gke
@docker exec gke-bastion \
sh -c "kubectl create serviceaccount $(GKE_CLUSTER_NAME) \
&& kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=default:$(GKE_CLUSTER_NAME) --namespace=default \
&& kubectl create secret generic gcloud-creds --from-file=key.json=/tmp/gcp.json"
.PHONY: gke-spark-clean
gke-spark-clean: ## Clean spark resources on kubernetes cluster.
@docker exec gke-bastion \
sh -c "kubectl delete all -l sparknetes=true"
.PHONY: gke-job-logs
gke-job-logs: ## Follow logs of jobs. JOB_NAME variable is required.
@docker exec -t gke-bastion \
sh -c "kubectl logs -f job/$(JOB_NAME)"
.PHONY: spark-basic-example
spark-basic-example: ## Launch basic example (SparkPi) from a kubernetes pod.
@docker exec gke-bastion \
sh -c "kubectl run spark-basic-job -l sparknetes=true --image=$(DOCKER_ORG)/spark:$(DOCKER_TAG) --restart=OnFailure \
--serviceaccount=spark --command -- /opt/spark/bin/spark-submit \
--master k8s://https://kubernetes.default.svc.cluster.local \
--deploy-mode cluster \
--name spark-basic-pi \
--class org.apache.spark.examples.SparkPi \
--conf spark.executor.instances=3 \
--conf spark.kubernetes.container.image=$(DOCKER_ORG)/spark:$(DOCKER_TAG) \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.driver.label.sparknetes=true \
local:///opt/spark/examples/target/original-spark-examples_2.11-*.jar"
.PHONY: spark-ml-example
spark-ml-example: ## Launch ml example from a kubernetes pod.
@docker exec gke-bastion \
sh -c "kubectl run spark-ml-job -l sparknetes=true --image=$(DOCKER_ORG)/spark:$(DOCKER_TAG) --restart=OnFailure \
--serviceaccount=spark --command -- /opt/spark/bin/spark-submit \
--master k8s://https://kubernetes.default.svc.cluster.local \
--deploy-mode cluster \
--name spark-ml-LR \
--class org.apache.spark.examples.SparkLR \
--conf spark.executor.instances=3 \
--conf spark.kubernetes.container.image=$(DOCKER_ORG)/spark:$(DOCKER_TAG) \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.driver.label.sparknetes=true \
local:///opt/spark/examples/target/original-spark-examples_2.11-*-SNAPSHOT.jar"
.PHONY: spark-gcs-example
spark-gcs-example: ## Launch an example with GCS as data source. Adjust class and other confs
@docker exec gke-bastion \
sh -c "kubectl run spark-gcs-job -l sparknetes=true --image=$(DOCKER_ORG)/spark:$(DOCKER_TAG) --restart=OnFailure \
--serviceaccount=spark --command -- /opt/spark/bin/spark-submit \
--master k8s://https://kubernetes.default.svc.cluster.local \
--deploy-mode cluster \
--name spark-gcs \
--class com.bbva.fraud.ParseOp \
--conf spark.executor.instances=4 \
--conf spark.ui.enabled=true \
--jars https://storage.googleapis.com/sparknetes/libs/gcs-connector-1.6.6-hadoop2.jar \
--conf spark.kubernetes.container.image=$(DOCKER_ORG)/spark:$(DOCKER_TAG) \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.driverEnv.GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp/key.json \
--conf spark.executor.memory=4g \
--conf spark.kubernetes.executor.limit.cores=3 \
--conf spark.executor.cores=3 \
--conf spark.kubernetes.driver.secrets.gcloud-creds=/tmp/gcp \
--conf spark.kubernetes.driver.label.sparknetes=true \
https://storage.googleapis.com/sparknetes/fraud-joiner-assembly-1.1.jar gs://fraud-dataset/dataset/bkOn gs://sparknetes/fraud.parquet"
.PHONY: gke-spark-expose-ui
gke-spark-expose-ui: ## Expose a spark UI by a public IP
@SPARK_PODS=`docker exec gke-bastion \
sh -c "kubectl get pods --show-all -l spark-role=driver -o jsonpath='{range .items[*]}{.metadata.name}{\"\\t\"}{.metadata.annotations.spark-app-name}{\"\\n\"}{end}'"`; \
SPARK_POD=`echo $$SPARK_PODS | grep $(SPARK_APP_NAME) | awk '{print $$1}'`; \
docker exec gke-bastion \
sh -c "kubectl expose pod $$SPARK_POD --name $(SPARK_APP_NAME)-svc --port=4040 --target-port=4040 --type=LoadBalancer"
## Use pod port-forward or proxy url for an internal service
.PHONY: gke-spark-open-ui
gke-spark-open-ui: ## Open spark UI atomatically
$(OPEN) http://$(shell docker exec gke-bastion \
sh -c "kubectl get svc $(SPARK_APP_NAME)-svc -o jsonpath='{.status.loadBalancer.ingress[0].ip}'"):4040
.PHONY: gke-spark-operator-install
gke-spark-operator-install: ## Install spark operator helm chart
@docker exec gke-bastion \
sh -c "helm repo add incubator http://storage.googleapis.com/kubernetes-charts-incubator \
&& helm install incubator/sparkoperator --namespace default --set enableWebhook=true --wait"
.PHONY: gke-spark-operator-example
gke-spark-operator-example: ## Launch an example with GCS as data source. Adjust class and other confs. This example will use spark operator (incubator version)
@docker cp spark-operator.yaml gke-bastion:/tmp/spark-operator.yaml
@docker exec gke-bastion \
sh -c "kubectl apply -f /tmp/spark-operator.yaml"