forked from spark-jobserver/spark-jobserver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapplication.conf
260 lines (215 loc) · 8.72 KB
/
application.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# Settings for safe local mode development
spark {
# local[...], yarn or mesos://...
master = "local[4]"
# client or cluster deployment
submit.deployMode = "client"
# spark web UI port
webUrlPort = 8080
jobserver {
port = 8090
bind-address = "0.0.0.0"
# If true, a separate JVM is forked for each Spark context
context-per-jvm = false
# Number of job results to keep per JobResultActor/context
job-result-cache-size = 5000
kill-context-on-supervisor-down = false
# Note: JobFileDAO is deprecated from v0.7.0 because of issues in
# production and will be removed in future, now defaults to H2 file.
jobdao = spark.jobserver.io.JobSqlDAO
# Automatically load a set of jars at startup time. Key is the appName, value is the path/URL.
# job-bin-paths {
# test = /path/to/my/test.jar
# }
# Cache binary on upload.
# If set to false, the binary will be cached on job start only.
cache-on-upload = true
filedao {
rootdir = /tmp/spark-jobserver/filedao/data
}
datadao {
# storage directory for files that are uploaded to the server
# via POST/data commands
rootdir = /tmp/spark-jobserver/upload
}
cassandra {
consistency = "ONE"
hosts = ["localhost:9042"]
user = ""
password = ""
chunk-size-in-kb = 1024
}
# To load up job jars on startup, place them here,
# with the app name as the key and the path to the jar as the value
# job-bin-paths {
# test = ../job-server-tests/target/scala-2.10/job-server-tests_2.10-0.5.3-SNAPSHOT.jar
# }
sqldao {
# Slick database driver, full classpath
slick-driver = slick.driver.H2Driver
# JDBC driver, full classpath
jdbc-driver = org.h2.Driver
# Directory where default H2 driver stores its data. Only needed for H2.
rootdir = /tmp/spark-jobserver/sqldao/data
# Full JDBC URL / init string, along with username and password. Sorry, needs to match above.
# Substitutions may be used to launch job-server, but leave it out here in the default or tests won't pass
jdbc {
url = "jdbc:h2:file:/tmp/spark-jobserver/sqldao/data/h2-db"
user = ""
password = ""
}
# DB connection pool settings
dbcp {
enabled = false
maxactive = 20
maxidle = 10
initialsize = 10
}
}
# Start embedded H2 Server (usefull in cluster deployment)
startH2Server = false
# The ask pattern timeout for Api
short-timeout = 3 s
# Time out for job server to wait while creating contexts
context-creation-timeout = 60 s
# Time out for job server to wait while creating named objects
named-object-creation-timeout = 60 s
# Number of jobs that can be run simultaneously per context
# If not set, defaults to number of cores on machine where jobserver is running
max-jobs-per-context = 8
# in yarn deployment, time out for job server to wait while creating contexts
yarn-context-creation-timeout = 40 s
}
# predefined Spark contexts
# Below is an example, but do not uncomment it. Everything defined here is carried over to
# deploy-time configs, so they will be created in all environments. :(
contexts {
# abc-demo {
# num-cpu-cores = 4 # Number of cores to allocate. Required.
# memory-per-node = 1024m # Executor memory per node, -Xmx style eg 512m, 1G, etc.
# }
# define additional contexts here
}
# Default settings for ad hoc as well as manually created contexts
# You can add any Spark config params here, for example, spark.mesos.coarse = true
context-settings {
num-cpu-cores = 4 # Number of cores to allocate. Required.
memory-per-node = 512m # Executor memory per node, -Xmx style eg 512m, 1G, etc.
# A zero-arg class implementing spark.jobserver.context.SparkContextFactory
# Determines the type of jobs that can run in a SparkContext
context-factory = spark.jobserver.context.DefaultSparkContextFactory
streaming {
# Default batch interval for Spark Streaming contexts in milliseconds
batch_interval = 1000
# if true, stops gracefully by waiting for the processing of all received data to be completed
stopGracefully = true
# if true, stops the SparkContext with the StreamingContext. The underlying SparkContext will be
# stopped regardless of whether the StreamingContext has been started.
stopSparkContext = true
}
# uris of jars to be loaded into the classpath for this context. Uris is a string list, or a string separated by commas ','
# dependent-jar-uris = ["file:///some/path/present/in/each/mesos/slave/somepackage.jar"]
# Timeout for SupervisorActor to wait for forked (separate JVM) contexts to initialize
context-init-timeout = 60s
passthrough {
spark.driver.allowMultipleContexts = true # Ignore the Multiple context exception related with SPARK-2243
}
#This adds configuration to the underlying Hadoop configuration in the Spark Context
#hadoop {
# mapreduce.framework.name = "FooFramework"
#}
#Settings for configuring instance of PythonContextFactory
python {
#Any locations of python libraries to be included on the PYTHONPATH when running Python jobs
paths = []
#The shell command to run when launching the subprocess for python jobs
executable = "python"
}
# All the above sections have higher precedence because those properties are added directly to
# SparkConf.
# The following properties are added directly to SparkLauncher (spark-submit). All
# launcher specific properties should be added here and they can be overridden while creating
# context e.g.
# curl -X POST "localhost:8090/contexts/test-context?launcher.spark.driver.memory=512m"
launcher {
spark.driver.memory = 1g
}
}
}
# Flyway locations to scan recursively for migrations
flyway.locations = "db/h2/migration"
flyway.initOnMigrate = false
akka {
# Use SLF4J/logback for deployed environment logging
loggers = ["akka.event.slf4j.Slf4jLogger"]
actor {
provider = "akka.cluster.ClusterActorRefProvider"
warn-about-java-serializer-usage = off
serializers {
customStartJobSerializer = "spark.jobserver.util.StartJobSerializer"
}
serialization-bindings {
"spark.jobserver.JobManagerActor$StartJob" = customStartJobSerializer
}
}
remote {
log-remote-lifecycle-events = off
netty.tcp {
hostname = "127.0.0.1"
port = 0
send-buffer-size = 20 MiB
receive-buffer-size = 20 MiB
# This controls the maximum message size, including job results, that can be sent
maximum-frame-size = 10 MiB
}
}
cluster {
auto-down-unreachable-after = 20s
metrics.enabled = off
failure-detector.acceptable-heartbeat-pause = 3s
failure-detector.threshold = 12.0
}
}
# check the reference.conf in spray-can/src/main/resources for all defined settings
spray.can.server {
## uncomment the next lines for making this an HTTPS example
# ssl-encryption = on
# keystore = "/some/path/server-keystore.jks"
# keystorePW = "changeit"
encryptionType = "SSL"
keystoreType = "JKS"
provider = "SunX509"
#
## Client Authentication (optional): activated upon providing a truststore file
# truststore = "/some/path/server-truststore.jks"
# truststorePW = "changeit"
truststore-type = "JKS"
truststore-provider = "SunX509"
#
# see http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#SSLContext for more examples
# typical are either SSL or TLS
# key manager factory provider
# ssl engine provider protocols
enabledProtocols = ["SSLv3", "TLSv1"]
idle-timeout = 60 s
request-timeout = 40 s
pipelining-limit = 2 # for maximum performance (prevents StopReading / ResumeReading messages to the IOBridge)
# Needed for HTTP/1.0 requests with missing Host headers
default-host-header = "spray.io:8765"
# Increase this in order to upload bigger job jars
parsing.max-content-length = 30m
}
shiro {
# activate authentication (and authorization)
authentication = off
# absolute path to shiro config file, including file name
config.path = "/some/path/shiro.ini"
# note that ssl-encryption should also be on when authentication is on to avoid that passwords
# can be sniffed out
# Time out for job server to wait for authentication requests
authentication-timeout = 10 s
# when authentication is on, then this flag controls whether the authenticated user
# is always used as the proxy-user when starting contexts
# (note that this will overwrite the spark.proxy.user parameter that may be provided to the POST /contexts/new-context command)
use-as-proxy-user = off
}