Skip to content

Commit 40f525c

Browse files
authored
fix(aws_s3 source): Use the decoder to calculate type defs (#18274)
* fix output type * fix output type too * update tests * fix tests * add json test * cleanup * ignore temporary compose files * fix .gitignore style
1 parent 294c1dd commit 40f525c

File tree

2 files changed

+57
-6
lines changed

2 files changed

+57
-6
lines changed

scripts/integration/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*/compose-temp*.yaml

src/sources/aws_s3/mod.rs

+56-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::{convert::TryInto, io::ErrorKind};
33
use async_compression::tokio::bufread;
44
use aws_sdk_s3::types::ByteStream;
55
use codecs::decoding::{DeserializerConfig, FramingConfig, NewlineDelimitedDecoderOptions};
6-
use codecs::{BytesDeserializerConfig, NewlineDelimitedDecoderConfig};
6+
use codecs::NewlineDelimitedDecoderConfig;
77
use futures::{stream, stream::StreamExt, TryStreamExt};
88
use lookup::owned_value_path;
99
use snafu::Snafu;
@@ -14,7 +14,6 @@ use vrl::value::{kind::Collection, Kind};
1414

1515
use super::util::MultilineConfig;
1616
use crate::codecs::DecodingConfig;
17-
use crate::config::DataType;
1817
use crate::{
1918
aws::{auth::AwsAuthentication, create_client, create_client_and_region, RegionOrEndpoint},
2019
common::{s3::S3ClientBuilder, sqs::SqsClientBuilder},
@@ -163,7 +162,8 @@ impl SourceConfig for AwsS3Config {
163162

164163
fn outputs(&self, global_log_namespace: LogNamespace) -> Vec<SourceOutput> {
165164
let log_namespace = global_log_namespace.merge(self.log_namespace);
166-
let mut schema_definition = BytesDeserializerConfig
165+
let mut schema_definition = self
166+
.decoding
167167
.schema_definition(log_namespace)
168168
.with_source_metadata(
169169
Self::NAME,
@@ -199,7 +199,7 @@ impl SourceConfig for AwsS3Config {
199199
Self::NAME,
200200
None,
201201
&owned_value_path!("metadata"),
202-
Kind::object(Collection::empty().with_unknown(Kind::bytes())),
202+
Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(),
203203
None,
204204
);
205205

@@ -208,7 +208,10 @@ impl SourceConfig for AwsS3Config {
208208
schema_definition = schema_definition.unknown_fields(Kind::bytes());
209209
}
210210

211-
vec![SourceOutput::new_logs(DataType::Log, schema_definition)]
211+
vec![SourceOutput::new_logs(
212+
self.decoding.output_type(),
213+
schema_definition,
214+
)]
212215
}
213216

214217
fn can_acknowledge(&self) -> bool {
@@ -440,6 +443,7 @@ mod integration_tests {
440443

441444
use aws_sdk_s3::{types::ByteStream, Client as S3Client};
442445
use aws_sdk_sqs::{model::QueueAttributeName, Client as SqsClient};
446+
use codecs::{decoding::DeserializerConfig, JsonDeserializerConfig};
443447
use lookup::path;
444448
use similar_asserts::assert_eq;
445449
use vrl::value::Value;
@@ -483,6 +487,35 @@ mod integration_tests {
483487
logs,
484488
Delivered,
485489
false,
490+
DeserializerConfig::Bytes,
491+
)
492+
.await;
493+
}
494+
495+
#[tokio::test]
496+
async fn s3_process_json_message() {
497+
trace_init();
498+
499+
let logs: Vec<String> = random_lines(100).take(10).collect();
500+
501+
let json_logs: Vec<String> = logs
502+
.iter()
503+
.map(|msg| {
504+
// convert to JSON object
505+
format!(r#"{{"message": "{}"}}"#, msg)
506+
})
507+
.collect();
508+
509+
test_event(
510+
None,
511+
None,
512+
None,
513+
None,
514+
json_logs.join("\n").into_bytes(),
515+
logs,
516+
Delivered,
517+
false,
518+
DeserializerConfig::Json(JsonDeserializerConfig::default()),
486519
)
487520
.await;
488521
}
@@ -502,6 +535,7 @@ mod integration_tests {
502535
logs,
503536
Delivered,
504537
true,
538+
DeserializerConfig::Bytes,
505539
)
506540
.await;
507541
}
@@ -522,6 +556,7 @@ mod integration_tests {
522556
logs,
523557
Delivered,
524558
false,
559+
DeserializerConfig::Bytes,
525560
)
526561
.await;
527562
}
@@ -542,6 +577,7 @@ mod integration_tests {
542577
logs,
543578
Delivered,
544579
false,
580+
DeserializerConfig::Bytes,
545581
)
546582
.await;
547583
}
@@ -570,6 +606,7 @@ mod integration_tests {
570606
logs,
571607
Delivered,
572608
false,
609+
DeserializerConfig::Bytes,
573610
)
574611
.await;
575612
}
@@ -599,6 +636,7 @@ mod integration_tests {
599636
logs,
600637
Delivered,
601638
false,
639+
DeserializerConfig::Bytes,
602640
)
603641
.await;
604642
}
@@ -628,6 +666,7 @@ mod integration_tests {
628666
logs,
629667
Delivered,
630668
false,
669+
DeserializerConfig::Bytes,
631670
)
632671
.await;
633672
}
@@ -655,6 +694,7 @@ mod integration_tests {
655694
vec!["abc\ndef\ngeh".to_owned()],
656695
Delivered,
657696
false,
697+
DeserializerConfig::Bytes,
658698
)
659699
.await;
660700
}
@@ -677,6 +717,7 @@ mod integration_tests {
677717
logs,
678718
Errored,
679719
false,
720+
DeserializerConfig::Bytes,
680721
)
681722
.await;
682723
}
@@ -696,6 +737,7 @@ mod integration_tests {
696737
logs,
697738
Rejected,
698739
false,
740+
DeserializerConfig::Bytes,
699741
)
700742
.await;
701743
}
@@ -708,6 +750,7 @@ mod integration_tests {
708750
queue_url: &str,
709751
multiline: Option<MultilineConfig>,
710752
log_namespace: bool,
753+
decoding: DeserializerConfig,
711754
) -> AwsS3Config {
712755
AwsS3Config {
713756
region: RegionOrEndpoint::with_both("us-east-1", s3_address()),
@@ -723,6 +766,7 @@ mod integration_tests {
723766
}),
724767
acknowledgements: true.into(),
725768
log_namespace: Some(log_namespace),
769+
decoding,
726770
..Default::default()
727771
}
728772
}
@@ -738,6 +782,7 @@ mod integration_tests {
738782
expected_lines: Vec<String>,
739783
status: EventStatus,
740784
log_namespace: bool,
785+
decoding: DeserializerConfig,
741786
) {
742787
assert_source_compliance(&SOURCE_TAGS, async move {
743788
let key = key.unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
@@ -750,7 +795,7 @@ mod integration_tests {
750795

751796
tokio::time::sleep(Duration::from_secs(1)).await;
752797

753-
let config = config(&queue, multiline, log_namespace);
798+
let config = config(&queue, multiline, log_namespace, decoding);
754799

755800
s3.put_object()
756801
.bucket(bucket.clone())
@@ -831,6 +876,11 @@ mod integration_tests {
831876

832877
assert_eq!(expected_lines.len(), events.len());
833878
for (i, event) in events.iter().enumerate() {
879+
880+
if let Some(schema_definition) = config.outputs(namespace).pop().unwrap().schema_definition {
881+
schema_definition.is_valid_for_event(event).unwrap();
882+
}
883+
834884
let message = expected_lines[i].as_str();
835885

836886
let log = event.as_log();

0 commit comments

Comments
 (0)