-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
Copy pathazure_blob.cue
585 lines (523 loc) · 19.6 KB
/
azure_blob.cue
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
package metadata
base: components: sinks: azure_blob: configuration: {
acknowledgements: {
description: """
Controls how acknowledgements are handled for this sink.
See [End-to-end Acknowledgements][e2e_acks] for more information on how event acknowledgement is handled.
[e2e_acks]: https://vector.dev/docs/about/under-the-hood/architecture/end-to-end-acknowledgements/
"""
required: false
type: object: options: enabled: {
description: """
Whether or not end-to-end acknowledgements are enabled.
When enabled for a sink, any source connected to that sink, where the source supports
end-to-end acknowledgements as well, waits for events to be acknowledged by the sink
before acknowledging them at the source.
Enabling or disabling acknowledgements at the sink level takes precedence over any global
[`acknowledgements`][global_acks] configuration.
[global_acks]: https://vector.dev/docs/reference/configuration/global-options/#acknowledgements
"""
required: false
type: bool: {}
}
}
batch: {
description: "Event batching behavior."
required: false
type: object: options: {
max_bytes: {
description: """
The maximum size of a batch that is processed by a sink.
This is based on the uncompressed size of the batched events, before they are
serialized/compressed.
"""
required: false
type: uint: {
default: 10000000
unit: "bytes"
}
}
max_events: {
description: "The maximum size of a batch before it is flushed."
required: false
type: uint: unit: "events"
}
timeout_secs: {
description: "The maximum age of a batch before it is flushed."
required: false
type: float: {
default: 300.0
unit: "seconds"
}
}
}
}
blob_append_uuid: {
description: """
Whether or not to append a UUID v4 token to the end of the blob key.
The UUID is appended to the timestamp portion of the object key, such that if the blob key
generated is `date=2022-07-18/1658176486`, setting this field to `true` results
in an blob key that looks like
`date=2022-07-18/1658176486-30f6652c-71da-4f9f-800d-a1189c47c547`.
This ensures there are no name collisions, and can be useful in high-volume workloads where
blob keys must be unique.
"""
required: false
type: bool: {}
}
blob_prefix: {
description: """
A prefix to apply to all blob keys.
Prefixes are useful for partitioning objects, such as by creating a blob key that
stores blobs under a particular directory. If using a prefix for this purpose, it must end
in `/` to act as a directory path. A trailing `/` is **not** automatically added.
"""
required: false
type: string: {
default: "blob/%F/"
examples: ["date/%F/hour/%H/", "year=%Y/month=%m/day=%d/", "kubernetes/{{ metadata.cluster }}/{{ metadata.application_name }}/"]
syntax: "template"
}
}
blob_time_format: {
description: """
The timestamp format for the time component of the blob key.
By default, blob keys are appended with a timestamp that reflects when the blob are sent to
Azure Blob Storage, such that the resulting blob key is functionally equivalent to joining
the blob prefix with the formatted timestamp, such as `date=2022-07-18/1658176486`.
This would represent a `blob_prefix` set to `date=%F/` and the timestamp of Mon Jul 18 2022
20:34:44 GMT+0000, with the `filename_time_format` being set to `%s`, which renders
timestamps in seconds since the Unix epoch.
Supports the common [`strftime`][chrono_strftime_specifiers] specifiers found in most
languages.
When set to an empty string, no timestamp is appended to the blob prefix.
[chrono_strftime_specifiers]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html#specifiers
"""
required: false
type: string: syntax: "strftime"
}
compression: {
description: """
Compression configuration.
All compression algorithms use the default compression level unless otherwise specified.
"""
required: false
type: string: {
default: "gzip"
enum: {
gzip: """
[Gzip][gzip] compression.
[gzip]: https://www.gzip.org/
"""
none: "No compression."
zlib: """
[Zlib][zlib] compression.
[zlib]: https://zlib.net/
"""
zstd: """
[Zstandard][zstd] compression.
[zstd]: https://facebook.github.io/zstd/
"""
}
}
}
connection_string: {
description: """
The Azure Blob Storage Account connection string.
Authentication with access key is the only supported authentication method.
Either `storage_account`, or this field, must be specified.
"""
required: false
type: string: examples: ["DefaultEndpointsProtocol=https;AccountName=mylogstorage;AccountKey=storageaccountkeybase64encoded;EndpointSuffix=core.windows.net"]
}
container_name: {
description: "The Azure Blob Storage Account container name."
required: true
type: string: examples: ["my-logs"]
}
encoding: {
description: "Configures how events are encoded into raw bytes."
required: true
type: object: options: {
avro: {
description: "Apache Avro-specific encoder options."
relevant_when: "codec = \"avro\""
required: true
type: object: options: schema: {
description: "The Avro schema."
required: true
type: string: examples: ["{ \"type\": \"record\", \"name\": \"log\", \"fields\": [{ \"name\": \"message\", \"type\": \"string\" }] }"]
}
}
codec: {
description: "The codec to use for encoding events."
required: true
type: string: enum: {
avro: """
Encodes an event as an [Apache Avro][apache_avro] message.
[apache_avro]: https://avro.apache.org/
"""
csv: """
Encodes an event as a CSV message.
This codec must be configured with fields to encode.
"""
gelf: """
Encodes an event as a [GELF][gelf] message.
[gelf]: https://docs.graylog.org/docs/gelf
"""
json: """
Encodes an event as [JSON][json].
[json]: https://www.json.org/
"""
logfmt: """
Encodes an event as a [logfmt][logfmt] message.
[logfmt]: https://brandur.org/logfmt
"""
native: """
Encodes an event in the [native Protocol Buffers format][vector_native_protobuf].
This codec is **[experimental][experimental]**.
[vector_native_protobuf]: https://github.com/vectordotdev/vector/blob/master/lib/vector-core/proto/event.proto
[experimental]: https://vector.dev/highlights/2022-03-31-native-event-codecs
"""
native_json: """
Encodes an event in the [native JSON format][vector_native_json].
This codec is **[experimental][experimental]**.
[vector_native_json]: https://github.com/vectordotdev/vector/blob/master/lib/codecs/tests/data/native_encoding/schema.cue
[experimental]: https://vector.dev/highlights/2022-03-31-native-event-codecs
"""
raw_message: """
No encoding.
This encoding uses the `message` field of a log event.
Be careful if you are modifying your log events (for example, by using a `remap`
transform) and removing the message field while doing additional parsing on it, as this
could lead to the encoding emitting empty strings for the given event.
"""
text: """
Plain text encoding.
This encoding uses the `message` field of a log event. For metrics, it uses an
encoding that resembles the Prometheus export format.
Be careful if you are modifying your log events (for example, by using a `remap`
transform) and removing the message field while doing additional parsing on it, as this
could lead to the encoding emitting empty strings for the given event.
"""
}
}
csv: {
description: "The CSV Serializer Options."
relevant_when: "codec = \"csv\""
required: true
type: object: options: {
capacity: {
description: """
Set the capacity (in bytes) of the internal buffer used in the CSV writer.
This defaults to a reasonable setting.
"""
required: false
type: uint: default: 8192
}
delimiter: {
description: "The field delimiter to use when writing CSV."
required: false
type: uint: default: 44
}
double_quote: {
description: """
Enable double quote escapes.
This is enabled by default, but it may be disabled. When disabled, quotes in
field data are escaped instead of doubled.
"""
required: false
type: bool: default: true
}
escape: {
description: """
The escape character to use when writing CSV.
In some variants of CSV, quotes are escaped using a special escape character
like \\ (instead of escaping quotes by doubling them).
To use this, `double_quotes` needs to be disabled as well otherwise it is ignored.
"""
required: false
type: uint: default: 34
}
fields: {
description: """
Configures the fields that will be encoded, as well as the order in which they
appear in the output.
If a field is not present in the event, the output will be an empty string.
Values of type `Array`, `Object`, and `Regex` are not supported and the
output will be an empty string.
"""
required: true
type: array: items: type: string: {}
}
quote: {
description: "The quote character to use when writing CSV."
required: false
type: uint: default: 34
}
quote_style: {
description: "The quoting style to use when writing CSV data."
required: false
type: string: {
default: "necessary"
enum: {
always: "Always puts quotes around every field."
necessary: """
Puts quotes around fields only when necessary.
They are necessary when fields contain a quote, delimiter, or record terminator.
Quotes are also necessary when writing an empty record
(which is indistinguishable from a record with one empty field).
"""
never: "Never writes quotes, even if it produces invalid CSV data."
non_numeric: """
Puts quotes around all fields that are non-numeric.
Namely, when writing a field that does not parse as a valid float or integer,
then quotes are used even if they aren't strictly necessary.
"""
}
}
}
}
}
except_fields: {
description: "List of fields that are excluded from the encoded event."
required: false
type: array: items: type: string: {}
}
metric_tag_values: {
description: """
Controls how metric tag values are encoded.
When set to `single`, only the last non-bare value of tags are displayed with the
metric. When set to `full`, all metric tags are exposed as separate assignments.
"""
relevant_when: "codec = \"json\" or codec = \"text\""
required: false
type: string: {
default: "single"
enum: {
full: "All tags are exposed as arrays of either string or null values."
single: """
Tag values are exposed as single strings, the same as they were before this config
option. Tags with multiple values show the last assigned value, and null values
are ignored.
"""
}
}
}
only_fields: {
description: "List of fields that are included in the encoded event."
required: false
type: array: items: type: string: {}
}
timestamp_format: {
description: "Format used for timestamp fields."
required: false
type: string: enum: {
rfc3339: "Represent the timestamp as a RFC 3339 timestamp."
unix: "Represent the timestamp as a Unix timestamp."
}
}
}
}
endpoint: {
description: """
The Azure Blob Storage Endpoint URL.
This is used to override the default blob storage endpoint URL in cases where you are using
credentials read from the environment/managed identities or access tokens without using an
explicit connection_string (which already explicitly supports overriding the blob endpoint
URL).
This may only be used with `storage_account` and is ignored when used with
`connection_string`.
"""
required: false
type: string: examples: ["https://test.blob.core.usgovcloudapi.net/", "https://test.blob.core.windows.net/"]
}
framing: {
description: "Framing configuration."
required: false
type: object: options: {
character_delimited: {
description: "Options for the character delimited encoder."
relevant_when: "method = \"character_delimited\""
required: true
type: object: options: delimiter: {
description: "The ASCII (7-bit) character that delimits byte sequences."
required: true
type: uint: {}
}
}
method: {
description: "The framing method."
required: true
type: string: enum: {
bytes: "Event data is not delimited at all."
character_delimited: "Event data is delimited by a single ASCII (7-bit) character."
length_delimited: """
Event data is prefixed with its length in bytes.
The prefix is a 32-bit unsigned integer, little endian.
"""
newline_delimited: "Event data is delimited by a newline (LF) character."
}
}
}
}
request: {
description: """
Middleware settings for outbound requests.
Various settings can be configured, such as concurrency and rate limits, timeouts, etc.
"""
required: false
type: object: options: {
adaptive_concurrency: {
description: """
Configuration of adaptive concurrency parameters.
These parameters typically do not require changes from the default, and incorrect values can lead to meta-stable or
unstable performance and sink behavior. Proceed with caution.
"""
required: false
type: object: options: {
decrease_ratio: {
description: """
The fraction of the current value to set the new concurrency limit when decreasing the limit.
Valid values are greater than `0` and less than `1`. Smaller values cause the algorithm to scale back rapidly
when latency increases.
Note that the new limit is rounded down after applying this ratio.
"""
required: false
type: float: default: 0.9
}
ewma_alpha: {
description: """
The weighting of new measurements compared to older measurements.
Valid values are greater than `0` and less than `1`.
ARC uses an exponentially weighted moving average (EWMA) of past RTT measurements as a reference to compare with
the current RTT. Smaller values cause this reference to adjust more slowly, which may be useful if a service has
unusually high response variability.
"""
required: false
type: float: default: 0.4
}
initial_concurrency: {
description: """
The initial concurrency limit to use. If not specified, the initial limit will be 1 (no concurrency).
It is recommended to set this value to your service's average limit if you're seeing that it takes a
long time to ramp up adaptive concurrency after a restart. You can find this value by looking at the
`adaptive_concurrency_limit` metric.
"""
required: false
type: uint: default: 1
}
rtt_deviation_scale: {
description: """
Scale of RTT deviations which are not considered anomalous.
Valid values are greater than or equal to `0`, and we expect reasonable values to range from `1.0` to `3.0`.
When calculating the past RTT average, we also compute a secondary “deviation” value that indicates how variable
those values are. We use that deviation when comparing the past RTT average to the current measurements, so we
can ignore increases in RTT that are within an expected range. This factor is used to scale up the deviation to
an appropriate range. Larger values cause the algorithm to ignore larger increases in the RTT.
"""
required: false
type: float: default: 2.5
}
}
}
concurrency: {
description: """
Configuration for outbound request concurrency.
This can be set either to one of the below enum values or to a positive integer, which denotes
a fixed concurrency limit.
"""
required: false
type: {
string: {
default: "adaptive"
enum: {
adaptive: """
Concurrency will be managed by Vector's [Adaptive Request Concurrency][arc] feature.
[arc]: https://vector.dev/docs/about/under-the-hood/networking/arc/
"""
none: """
A fixed concurrency of 1.
Only one request can be outstanding at any given time.
"""
}
}
uint: {}
}
}
rate_limit_duration_secs: {
description: "The time window used for the `rate_limit_num` option."
required: false
type: uint: {
default: 1
unit: "seconds"
}
}
rate_limit_num: {
description: "The maximum number of requests allowed within the `rate_limit_duration_secs` time window."
required: false
type: uint: {
default: 9223372036854775807
unit: "requests"
}
}
retry_attempts: {
description: """
The maximum number of retries to make for failed requests.
The default, for all intents and purposes, represents an infinite number of retries.
"""
required: false
type: uint: {
default: 9223372036854775807
unit: "retries"
}
}
retry_initial_backoff_secs: {
description: """
The amount of time to wait before attempting the first retry for a failed request.
After the first retry has failed, the fibonacci sequence is used to select future backoffs.
"""
required: false
type: uint: {
default: 1
unit: "seconds"
}
}
retry_max_duration_secs: {
description: "The maximum amount of time to wait between retries."
required: false
type: uint: {
default: 3600
unit: "seconds"
}
}
timeout_secs: {
description: """
The time a request can take before being aborted.
Datadog highly recommends that you do not lower this value below the service's internal timeout, as this could
create orphaned requests, pile on retries, and result in duplicate data downstream.
"""
required: false
type: uint: {
default: 60
unit: "seconds"
}
}
}
}
storage_account: {
description: """
The Azure Blob Storage Account name.
Attempts to load credentials for the account in the following ways, in order:
- read from environment variables ([more information][env_cred_docs])
- looks for a [Managed Identity][managed_ident_docs]
- uses the `az` CLI tool to get an access token ([more information][az_cli_docs])
Either `connection_string`, or this field, must be specified.
[env_cred_docs]: https://docs.rs/azure_identity/latest/azure_identity/struct.EnvironmentCredential.html
[managed_ident_docs]: https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview
[az_cli_docs]: https://docs.microsoft.com/en-us/cli/azure/account?view=azure-cli-latest#az-account-get-access-token
"""
required: false
type: string: examples: ["mylogstorage"]
}
}