Skip to content

Commit d648c86

Browse files
authored
feat(codecs): Add full codec support to AWS S3 source/sink (vectordotdev#17098)
1 parent d286d16 commit d648c86

File tree

5 files changed

+308
-101
lines changed

5 files changed

+308
-101
lines changed

src/codecs/decoding/decoder.rs

+14-8
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@ use crate::{
1515
/// messages.
1616
#[derive(Clone)]
1717
pub struct Decoder {
18-
framer: Framer,
19-
deserializer: Deserializer,
20-
log_namespace: LogNamespace,
18+
/// The framer being used.
19+
pub framer: Framer,
20+
/// The deserializer being used.
21+
pub deserializer: Deserializer,
22+
/// The `log_namespace` being used.
23+
pub log_namespace: LogNamespace,
2124
}
2225

2326
impl Default for Decoder {
@@ -61,16 +64,19 @@ impl Decoder {
6164
Error::FramingError(error)
6265
})?;
6366

64-
let frame = match frame {
65-
Some(frame) => frame,
66-
_ => return Ok(None),
67-
};
67+
frame
68+
.map(|frame| self.deserializer_parse(frame))
69+
.transpose()
70+
}
6871

72+
/// Parses a frame using the included deserializer, and handles any errors by logging.
73+
pub fn deserializer_parse(&self, frame: Bytes) -> Result<(SmallVec<[Event; 1]>, usize), Error> {
6974
let byte_size = frame.len();
75+
7076
// Parse structured events from the byte frame.
7177
self.deserializer
7278
.parse(frame, self.log_namespace)
73-
.map(|events| Some((events, byte_size)))
79+
.map(|events| (events, byte_size))
7480
.map_err(|error| {
7581
emit!(DecoderDeserializeError { error: &error });
7682
Error::ParsingError(error)

src/sinks/aws_s3/config.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,7 @@ use super::sink::S3RequestOptions;
1313
use crate::{
1414
aws::{AwsAuthentication, RegionOrEndpoint},
1515
codecs::{Encoder, EncodingConfigWithFraming, SinkType},
16-
config::{
17-
AcknowledgementsConfig, DataType, GenerateConfig, Input, ProxyConfig, SinkConfig,
18-
SinkContext,
19-
},
16+
config::{AcknowledgementsConfig, GenerateConfig, Input, ProxyConfig, SinkConfig, SinkContext},
2017
sinks::{
2118
s3_common::{
2219
self,
@@ -177,7 +174,7 @@ impl SinkConfig for S3SinkConfig {
177174
}
178175

179176
fn input(&self) -> Input {
180-
Input::new(self.encoding.config().1.input_type() & DataType::Log)
177+
Input::new(self.encoding.config().1.input_type())
181178
}
182179

183180
fn acknowledgements(&self) -> &AcknowledgementsConfig {

src/sources/aws_s3/mod.rs

+30-4
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,27 @@ use std::{convert::TryInto, io::ErrorKind};
22

33
use async_compression::tokio::bufread;
44
use aws_sdk_s3::types::ByteStream;
5+
use codecs::decoding::{DeserializerConfig, FramingConfig, NewlineDelimitedDecoderOptions};
56
use codecs::BytesDeserializerConfig;
67
use futures::{stream, stream::StreamExt, TryStreamExt};
78
use lookup::owned_value_path;
89
use snafu::Snafu;
910
use tokio_util::io::StreamReader;
1011
use value::{kind::Collection, Kind};
1112
use vector_config::configurable_component;
12-
use vector_core::config::{DataType, LegacyKey, LogNamespace};
13+
use vector_core::config::{LegacyKey, LogNamespace};
1314

1415
use super::util::MultilineConfig;
16+
use crate::codecs::DecodingConfig;
17+
use crate::config::DataType;
1518
use crate::{
1619
aws::{auth::AwsAuthentication, create_client, RegionOrEndpoint},
1720
common::{s3::S3ClientBuilder, sqs::SqsClientBuilder},
1821
config::{
1922
ProxyConfig, SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput,
2023
},
2124
line_agg,
22-
serde::bool_or_struct,
25+
serde::{bool_or_struct, default_decoding},
2326
tls::TlsConfig,
2427
};
2528

@@ -71,7 +74,8 @@ enum Strategy {
7174
//
7275
// Maybe showing defaults at all, when there are required properties, doesn't actually make sense? :thinkies:
7376
#[configurable_component(source("aws_s3", "Collect logs from AWS S3."))]
74-
#[derive(Clone, Debug, Default)]
77+
#[derive(Clone, Debug, Derivative)]
78+
#[derivative(Default)]
7579
#[serde(default, deny_unknown_fields)]
7680
pub struct AwsS3Config {
7781
#[serde(flatten)]
@@ -115,6 +119,23 @@ pub struct AwsS3Config {
115119
#[configurable(metadata(docs::hidden))]
116120
#[serde(default)]
117121
log_namespace: Option<bool>,
122+
123+
#[configurable(derived)]
124+
#[serde(default = "default_framing")]
125+
#[derivative(Default(value = "default_framing()"))]
126+
pub framing: FramingConfig,
127+
128+
#[configurable(derived)]
129+
#[serde(default = "default_decoding")]
130+
#[derivative(Default(value = "default_decoding()"))]
131+
pub decoding: DeserializerConfig,
132+
}
133+
134+
const fn default_framing() -> FramingConfig {
135+
// This is used for backwards compatibility. It used to be the only (hardcoded) option.
136+
FramingConfig::NewlineDelimited {
137+
newline_delimited: NewlineDelimitedDecoderOptions { max_length: None },
138+
}
118139
}
119140

120141
impl_generate_config_from_default!(AwsS3Config);
@@ -133,7 +154,7 @@ impl SourceConfig for AwsS3Config {
133154

134155
match self.strategy {
135156
Strategy::Sqs => Ok(Box::pin(
136-
self.create_sqs_ingestor(multiline_config, &cx.proxy)
157+
self.create_sqs_ingestor(multiline_config, &cx.proxy, log_namespace)
137158
.await?
138159
.run(cx, self.acknowledgements, log_namespace),
139160
)),
@@ -200,6 +221,7 @@ impl AwsS3Config {
200221
&self,
201222
multiline: Option<line_agg::Config>,
202223
proxy: &ProxyConfig,
224+
log_namespace: LogNamespace,
203225
) -> crate::Result<sqs::Ingestor> {
204226
let region = self
205227
.region
@@ -221,6 +243,9 @@ impl AwsS3Config {
221243
)
222244
.await?;
223245

246+
let decoder =
247+
DecodingConfig::new(self.framing.clone(), self.decoding.clone(), log_namespace).build();
248+
224249
match self.sqs {
225250
Some(ref sqs) => {
226251
let sqs_client = create_client::<SqsClientBuilder>(
@@ -240,6 +265,7 @@ impl AwsS3Config {
240265
sqs.clone(),
241266
self.compression,
242267
multiline,
268+
decoder,
243269
)
244270
.await?;
245271

0 commit comments

Comments
 (0)