Skip to content
This repository was archived by the owner on Oct 24, 2022. It is now read-only.

Commit 0052a97

Browse files
author
John Aitchison
authored
v0.8.2 (#2)
* new version 0.8.2 * updating instructions changes * added info about static IP with NAT gateway * final notes * warning added to readme * note about optional nat gateway
1 parent a8c152b commit 0052a97

File tree

7 files changed

+370
-18
lines changed

7 files changed

+370
-18
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
networks:
2+
default:
3+
name: datahub_network
4+
services:
5+
broker:
6+
container_name: broker
7+
depends_on:
8+
- zookeeper
9+
environment:
10+
- KAFKA_BROKER_ID=1
11+
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
12+
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
13+
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
14+
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
15+
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
16+
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
17+
hostname: broker
18+
image: confluentinc/cp-kafka:5.4.0
19+
ports:
20+
- 29092:29092
21+
- 9092:9092
22+
datahub-actions:
23+
depends_on:
24+
- datahub-gms
25+
environment:
26+
- DATAHUB_GMS_HOST=datahub-gms
27+
- DATAHUB_GMS_PORT=8080
28+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
29+
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
30+
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
31+
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
32+
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
33+
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
34+
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
35+
hostname: actions
36+
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
37+
restart: on-failure:5
38+
datahub-frontend-react:
39+
container_name: datahub-frontend-react
40+
depends_on:
41+
- datahub-gms
42+
environment:
43+
- DATAHUB_GMS_HOST=datahub-gms
44+
- DATAHUB_GMS_PORT=8080
45+
- DATAHUB_SECRET=YouKnowNothing
46+
- DATAHUB_APP_VERSION=1.0
47+
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
48+
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf
49+
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
50+
-Dlogback.debug=false -Dpidfile.path=/dev/null
51+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
52+
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
53+
- ELASTIC_CLIENT_HOST=elasticsearch
54+
- ELASTIC_CLIENT_PORT=9200
55+
hostname: datahub-frontend-react
56+
image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head}
57+
ports:
58+
- 9002:9002
59+
volumes:
60+
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
61+
datahub-gms:
62+
container_name: datahub-gms
63+
depends_on:
64+
- mysql
65+
environment:
66+
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
67+
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
68+
- DATASET_ENABLE_SCSI=false
69+
- EBEAN_DATASOURCE_USERNAME=datahub
70+
- EBEAN_DATASOURCE_PASSWORD=datahub
71+
- EBEAN_DATASOURCE_HOST=mysql:3306
72+
- EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
73+
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
74+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
75+
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
76+
- ELASTICSEARCH_HOST=elasticsearch
77+
- ELASTICSEARCH_PORT=9200
78+
- GRAPH_SERVICE_IMPL=elasticsearch
79+
- JAVA_OPTS=-Xms1g -Xmx1g
80+
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
81+
- MAE_CONSUMER_ENABLED=true
82+
- MCE_CONSUMER_ENABLED=true
83+
- PE_CONSUMER_ENABLED=true
84+
- UI_INGESTION_ENABLED=true
85+
- UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41
86+
hostname: datahub-gms
87+
image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head}
88+
ports:
89+
- 8080:8080
90+
volumes:
91+
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
92+
- ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources
93+
elasticsearch:
94+
container_name: elasticsearch
95+
environment:
96+
- discovery.type=single-node
97+
- xpack.security.enabled=false
98+
- ES_JAVA_OPTS=-Xms256m -Xmx256m -Dlog4j2.formatMsgNoLookups=true
99+
healthcheck:
100+
retries: 4
101+
start_period: 2m
102+
test:
103+
- CMD-SHELL
104+
- curl -sS --fail 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=0s'
105+
|| exit 1
106+
hostname: elasticsearch
107+
image: elasticsearch:7.9.3
108+
mem_limit: 1g
109+
ports:
110+
- 9200:9200
111+
volumes:
112+
- esdata:/usr/share/elasticsearch/data
113+
elasticsearch-setup:
114+
container_name: elasticsearch-setup
115+
depends_on:
116+
- elasticsearch
117+
environment:
118+
- ELASTICSEARCH_HOST=elasticsearch
119+
- ELASTICSEARCH_PORT=9200
120+
- ELASTICSEARCH_PROTOCOL=http
121+
hostname: elasticsearch-setup
122+
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
123+
kafka-setup:
124+
container_name: kafka-setup
125+
depends_on:
126+
- broker
127+
- schema-registry
128+
environment:
129+
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
130+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
131+
hostname: kafka-setup
132+
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
133+
mysql:
134+
command: --character-set-server=utf8mb4 --collation-server=utf8mb4_bin
135+
container_name: mysql
136+
environment:
137+
- MYSQL_DATABASE=datahub
138+
- MYSQL_USER=datahub
139+
- MYSQL_PASSWORD=datahub
140+
- MYSQL_ROOT_PASSWORD=datahub
141+
hostname: mysql
142+
image: mysql:5.7
143+
ports:
144+
- 3306:3306
145+
volumes:
146+
- ../mysql/init.sql:/docker-entrypoint-initdb.d/init.sql
147+
- mysqldata:/var/lib/mysql
148+
mysql-setup:
149+
container_name: mysql-setup
150+
depends_on:
151+
- mysql
152+
environment:
153+
- MYSQL_HOST=mysql
154+
- MYSQL_PORT=3306
155+
- MYSQL_USERNAME=datahub
156+
- MYSQL_PASSWORD=datahub
157+
- DATAHUB_DB_NAME=datahub
158+
hostname: mysql-setup
159+
image: acryldata/datahub-mysql-setup:${DATAHUB_VERSION:-head}
160+
schema-registry:
161+
container_name: schema-registry
162+
depends_on:
163+
- zookeeper
164+
- broker
165+
environment:
166+
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
167+
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
168+
hostname: schema-registry
169+
image: confluentinc/cp-schema-registry:5.4.0
170+
ports:
171+
- 8081:8081
172+
zookeeper:
173+
container_name: zookeeper
174+
environment:
175+
- ZOOKEEPER_CLIENT_PORT=2181
176+
- ZOOKEEPER_TICK_TIME=2000
177+
hostname: zookeeper
178+
image: confluentinc/cp-zookeeper:5.4.0
179+
ports:
180+
- 2181:2181
181+
volumes:
182+
- zkdata:/var/opt/zookeeper
183+
version: '2.3'
184+
volumes:
185+
esdata: null
186+
mysqldata: null
187+
zkdata: null
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
networks:
2+
default:
3+
name: datahub_network
4+
services:
5+
broker:
6+
container_name: broker
7+
depends_on:
8+
- zookeeper
9+
environment:
10+
- KAFKA_BROKER_ID=1
11+
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
12+
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
13+
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
14+
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
15+
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
16+
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
17+
hostname: broker
18+
image: confluentinc/cp-kafka:5.4.0
19+
ports:
20+
- 29092:29092
21+
- 9092:9092
22+
datahub-actions:
23+
depends_on:
24+
- datahub-gms
25+
environment:
26+
- DATAHUB_GMS_HOST=datahub-gms
27+
- DATAHUB_GMS_PORT=8080
28+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
29+
- SCHEMA_REGISTRY_URL=http://schema-registry:8081
30+
- METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4
31+
- METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1
32+
- DATAHUB_SYSTEM_CLIENT_ID=__datahub_system
33+
- DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing
34+
- KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT
35+
hostname: actions
36+
image: acryldata/datahub-actions:${ACTIONS_VERSION:-head}
37+
restart: on-failure:5
38+
datahub-frontend-react:
39+
container_name: datahub-frontend-react
40+
depends_on:
41+
- datahub-gms
42+
environment:
43+
- DATAHUB_GMS_HOST=datahub-gms
44+
- DATAHUB_GMS_PORT=8080
45+
- DATAHUB_SECRET=YouKnowNothing
46+
- DATAHUB_APP_VERSION=1.0
47+
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
48+
- JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=80 -Dconfig.file=datahub-frontend/conf/application.conf
49+
-Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml
50+
-Dlogback.debug=false -Dpidfile.path=/dev/null
51+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
52+
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
53+
- ELASTIC_CLIENT_HOST={{{ $elastic_host }}}
54+
- ELASTIC_CLIENT_PORT=443
55+
- ELASTIC_CLIENT_USE_SSL=true
56+
- ELASTIC_CLIENT_USERNAME={{{ $elastic_username }}}
57+
- ELASTIC_CLIENT_PASSWORD={{{ $elastic_password }}}
58+
hostname: datahub-frontend-react
59+
image: linkedin/datahub-frontend-react:v0.8.42
60+
ports:
61+
- 80:80
62+
volumes:
63+
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
64+
- frontend:/datahub-frontend/conf/
65+
datahub-gms:
66+
container_name: datahub-gms
67+
environment:
68+
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
69+
- DATAHUB_TELEMETRY_ENABLED=true
70+
- DATASET_ENABLE_SCSI=false
71+
- EBEAN_DATASOURCE_USERNAME={{{ $mysql_username }}}
72+
- EBEAN_DATASOURCE_PASSWORD={{{ $mysql_password }}}
73+
- EBEAN_DATASOURCE_HOST={{{ $mysql_host }}}:3306
74+
- EBEAN_DATASOURCE_URL=jdbc:mysql://{{{ $mysql_host }}}:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
75+
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
76+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
77+
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
78+
- ELASTICSEARCH_HOST={{{ $elastic_host }}}
79+
- ELASTICSEARCH_PORT=443
80+
- GRAPH_SERVICE_IMPL=elasticsearch
81+
- JAVA_OPTS=-Xms1g -Xmx1g
82+
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
83+
- MAE_CONSUMER_ENABLED=true
84+
- MCE_CONSUMER_ENABLED=true
85+
- PE_CONSUMER_ENABLED=true
86+
- UI_INGESTION_ENABLED=true
87+
- UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41
88+
- ELASTICSEARCH_USE_SSL=true
89+
- ELASTICSEARCH_USERNAME={{{ $elastic_username }}}
90+
- ELASTICSEARCH_PASSWORD={{{ $elastic_password }}}
91+
hostname: datahub-gms
92+
image: linkedin/datahub-gms:v0.8.42
93+
ports:
94+
- 8080:8080
95+
volumes:
96+
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
97+
- ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources
98+
kafka-setup:
99+
container_name: kafka-setup
100+
depends_on:
101+
- broker
102+
- schema-registry
103+
environment:
104+
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
105+
- KAFKA_BOOTSTRAP_SERVER=broker:29092
106+
hostname: kafka-setup
107+
image: linkedin/datahub-kafka-setup:v0.8.42
108+
schema-registry:
109+
container_name: schema-registry
110+
depends_on:
111+
- zookeeper
112+
- broker
113+
environment:
114+
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
115+
- SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181
116+
hostname: schema-registry
117+
image: confluentinc/cp-schema-registry:5.4.0
118+
ports:
119+
- 8081:8081
120+
zookeeper:
121+
container_name: zookeeper
122+
environment:
123+
- ZOOKEEPER_CLIENT_PORT=2181
124+
- ZOOKEEPER_TICK_TIME=2000
125+
hostname: zookeeper
126+
image: confluentinc/cp-zookeeper:5.4.0
127+
ports:
128+
- 2181:2181
129+
volumes:
130+
- zkdata:/var/opt/zookeeper
131+
datahub-frontend-react-setup:
132+
depends_on:
133+
- datahub-frontend-react
134+
container_name: datahub-frontend-react-setup
135+
hostname: datahub-frontend-react-setup
136+
image: alpine:latest
137+
command: sh -c "echo \"datahub:{{{ $datahub_master_password }}}\" > /datahub-frontend/conf/user.props && cd datahub-frontend/conf/ && ls && cat user.props && sleep 180"
138+
restart: "no"
139+
volumes:
140+
- frontend:/datahub-frontend/conf/
141+
version: '2.3'
142+
volumes:
143+
zkdata: null
144+
frontend:

admin/instructions.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ When a new version of DataHub comes out, the admins of this project need to take
66

77
# Steps
88
1. In the /admin/datahub-versions folder, create a new folder named the same as DataHub's tag for their new version, e.g. `v0.8.42`
9-
2. From the DataHub git repo, get `docker-compose-without-neo4j.quickstart.yml` from /docker/quickstart and paste it into the folder you created in step 1. This docker-compose file is what we use as a base for creating our own docker-compose file that's used for the Elastic Beanstalk application.
9+
2. In the DataHub git repo, open the git Tag of the new version. Get `docker-compose-without-neo4j.quickstart.yml` from /docker/quickstart and paste it into the folder you created in step 1. This docker-compose file is what we use as a base for creating our own docker-compose file that's used for the Elastic Beanstalk application.
1010
3. Read the new version's release notes. Fingers crossed that no changes need to be made to mysql or elasticsearch! If that's the case, the only thing that production deployments of DataHub will need to update is the docker container. If there *are* changes needed to mysql or elasticsearch, we should note that clearly in our own release notes of this repo.
11-
4. Check https://github.com/datahub-project/datahub/blob/master/docker/quickstart/docker-compose-without-neo4j.quickstart.yml to find the MySQL version that DataHub is using. It'll look like `image: mysql:5.7`. In our `/mysql/instructions.md` make sure that that version is listed.
12-
5. Check https://github.com/datahub-project/datahub/tree/master/metadata-service/restli-servlet-impl/src/main/resources/index/usage-event to see if any recent updates have been made to either `policy.json` or `index_template.json`. If so, you may need to update /elasticsearch/ap1.md or api2.md
13-
6. Check https://github.com/datahub-project/datahub/tree/efc5602493e66c83fa0ffe8cf9f9998fe9ec72bd/docker/mysql-setup `init.sql` to see if any recent updates have been made. If so, you may need to update /mysql/mysql.sql
11+
4. Check /docker/quickstart/docker-compose-without-neo4j.quickstart.yml to find the MySQL version that DataHub is using. It'll look like `image: mysql:5.7`. In our `/mysql/instructions.md` make sure that that version is listed.
12+
5. Check /metadata-service/restli-servlet-impl/src/main/resources/index/usage-event to see if any recent updates have been made to either `policy.json` or `index_template.json`. If so, you may need to update /elasticsearch/ap1.md or api2.md
13+
6. Check /docker/mysql-setup `init.sql` to see if any recent updates have been made. If so, you may need to update /mysql/mysql.sql
1414
7. Now build we need to build the new `docker-compose.yml` file that's used to create the Elastic Beanstalk application
1515
1. Diff the changes between the previous version and new version of `docker-compose-without-neo4j.quickstart.yml` In most cases the changes are very minor, and thus we can simply make a copy of `our-docker-compose.yml` file from the previous version, modify it to include the new changes, and voila we're done. If major changes have been made by DataHub, you'll need to consider whether to modify the previous `our-docker-compose.yml` file so that it contains these new changes, or to start with the new `docker-compose-without-neo4j.quickstart.yml` file supplied by DataHub and modify it to include our changes. Typically modifying `our-docker-compose.yml` will be easier, fewer steps.
1616
2. Once you're done (and you tested that it works, right?) put the resulting compose file in two locations: first, here in the appropriate /datahub-versions/ folder and name it `our-docker-compose.yml`. Second, in the /elasticbeanstalk folder and name it `docker-compose.yml`; that folder is where users of this repo get it from.

elasticbeanstalk/docker-compose.yml

+5-6
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ services:
2323
depends_on:
2424
- datahub-gms
2525
environment:
26-
- GMS_HOST=datahub-gms
27-
- GMS_PORT=8080
2826
- DATAHUB_GMS_HOST=datahub-gms
2927
- DATAHUB_GMS_PORT=8080
3028
- KAFKA_BOOTSTRAP_SERVER=broker:29092
@@ -58,7 +56,7 @@ services:
5856
- ELASTIC_CLIENT_USERNAME={{{ $elastic_username }}}
5957
- ELASTIC_CLIENT_PASSWORD={{{ $elastic_password }}}
6058
hostname: datahub-frontend-react
61-
image: linkedin/datahub-frontend-react:v0.8.41
59+
image: linkedin/datahub-frontend-react:v0.8.42
6260
ports:
6361
- 80:80
6462
volumes:
@@ -86,16 +84,17 @@ services:
8684
- MCE_CONSUMER_ENABLED=true
8785
- PE_CONSUMER_ENABLED=true
8886
- UI_INGESTION_ENABLED=true
89-
- UI_INGESTION_DEFAULT_CLI_VERSION=0.8.40
87+
- UI_INGESTION_DEFAULT_CLI_VERSION=0.8.41
9088
- ELASTICSEARCH_USE_SSL=true
9189
- ELASTICSEARCH_USERNAME={{{ $elastic_username }}}
9290
- ELASTICSEARCH_PASSWORD={{{ $elastic_password }}}
9391
hostname: datahub-gms
94-
image: linkedin/datahub-gms:v0.8.41
92+
image: linkedin/datahub-gms:v0.8.42
9593
ports:
9694
- 8080:8080
9795
volumes:
9896
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
97+
- ${HOME}/.datahub/plugins/auth/resources/:/etc/datahub/plugins/auth/resources
9998
kafka-setup:
10099
container_name: kafka-setup
101100
depends_on:
@@ -105,7 +104,7 @@ services:
105104
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
106105
- KAFKA_BOOTSTRAP_SERVER=broker:29092
107106
hostname: kafka-setup
108-
image: linkedin/datahub-kafka-setup:v0.8.41
107+
image: linkedin/datahub-kafka-setup:v0.8.42
109108
schema-registry:
110109
container_name: schema-registry
111110
depends_on:

0 commit comments

Comments
 (0)