diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java index 563359aa0978..e584944bda15 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java @@ -18,9 +18,10 @@ */ package org.apache.pinot.segment.local.recordtransformer; -import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nullable; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.data.Schema; @@ -39,30 +40,36 @@ public class CompositeTransformer implements RecordTransformer { *
NOTE: DO NOT CHANGE THE ORDER OF THE RECORD TRANSFORMERS *
NOTE: should put this after the {@link DataTypeTransformer} so that all values follow the data types in + * {@link FieldSpec}, and before the {@link NullValueTransformer} so that the invalidated value can be filled. + */ +public class TimeValidationTransformer implements RecordTransformer { + private static final Logger LOGGER = LoggerFactory.getLogger(TimeValidationTransformer.class); + + private final String _timeColumnName; + private final DateTimeFormatSpec _timeFormatSpec; + private final boolean _enableTimeValueCheck; + private final boolean _continueOnError; + + public TimeValidationTransformer(TableConfig tableConfig, Schema schema) { + _timeColumnName = tableConfig.getValidationConfig().getTimeColumnName(); + if (_timeColumnName != null) { + DateTimeFieldSpec dateTimeFieldSpec = schema.getSpecForTimeColumn(_timeColumnName); + Preconditions.checkState(dateTimeFieldSpec != null, "Failed to find spec for time column: %s from schema: %s", + _timeColumnName, schema.getSchemaName()); + _timeFormatSpec = dateTimeFieldSpec.getFormatSpec(); + IngestionConfig ingestionConfig = tableConfig.getIngestionConfig(); + if (ingestionConfig != null) { + _enableTimeValueCheck = ingestionConfig.isRowTimeValueCheck(); + _continueOnError = ingestionConfig.isContinueOnError(); + } else { + _enableTimeValueCheck = false; + _continueOnError = false; + } + } else { + _timeFormatSpec = null; + _enableTimeValueCheck = false; + _continueOnError = false; + } + } + + @Override + public boolean isNoOp() { + return !_enableTimeValueCheck; + } + + @Override + public GenericRow transform(GenericRow record) { + if (!_enableTimeValueCheck) { + return record; + } + Object timeValue = record.getValue(_timeColumnName); + if (timeValue == null) { + return record; + } + long timeValueMs; + try { + timeValueMs = _timeFormatSpec.fromFormatToMillis(timeValue.toString()); + } catch (Exception e) { + String errorMessage = + String.format("Caught exception while parsing time value: %s with format: %s", timeValue, _timeFormatSpec); + if (_continueOnError) { + LOGGER.debug(errorMessage); + record.putValue(_timeColumnName, null); + return record; + } else { + throw new IllegalStateException(errorMessage); + } + } + if (!TimeUtils.timeValueInValidRange(timeValueMs)) { + String errorMessage = + String.format("Time value: %s is not in valid range: %s", new DateTime(timeValueMs, DateTimeZone.UTC), + TimeUtils.VALID_TIME_INTERVAL); + if (_continueOnError) { + LOGGER.debug(errorMessage); + record.putValue(_timeColumnName, null); + return record; + } else { + throw new IllegalStateException(errorMessage); + } + } + return record; + } +} diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java index bcf0a40b7ec2..baded1b97a67 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java @@ -187,15 +187,14 @@ public void testDataTypeTransformerIncorrectDataTypes() { } @Test - public void testDataTypeTransformerInvalidTimestamp() { - // Invalid Timestamp and Validation disabled + public void testTimeValidationTransformer() { + // Invalid timestamp, validation disabled String timeCol = "timeCol"; + TableConfig tableConfig = + new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setTimeColumnName(timeCol).build(); Schema schema = new Schema.SchemaBuilder().addDateTime(timeCol, DataType.TIMESTAMP, "1:MILLISECONDS:TIMESTAMP", "1:MILLISECONDS").build(); - TableConfig tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTimeColumnName(timeCol).setTableName("testTable").build(); - - RecordTransformer transformer = new DataTypeTransformer(tableConfig, schema); + RecordTransformer transformer = new TimeValidationTransformer(tableConfig, schema); GenericRow record = getRecord(); record.putValue(timeCol, 1L); for (int i = 0; i < NUM_ROUNDS; i++) { @@ -204,15 +203,11 @@ record = transformer.transform(record); assertEquals(record.getValue(timeCol), 1L); } - // Invalid Timestamp and Validation enabled + // Invalid timestamp, validation enabled IngestionConfig ingestionConfig = new IngestionConfig(); ingestionConfig.setRowTimeValueCheck(true); - tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTimeColumnName(timeCol) - .setIngestionConfig(ingestionConfig) - .setTableName("testTable").build(); - - RecordTransformer transformerWithValidation = new DataTypeTransformer(tableConfig, schema); + tableConfig.setIngestionConfig(ingestionConfig); + RecordTransformer transformerWithValidation = new TimeValidationTransformer(tableConfig, schema); GenericRow record1 = getRecord(); record1.putValue(timeCol, 1L); for (int i = 0; i < NUM_ROUNDS; i++) { @@ -220,15 +215,8 @@ record = transformer.transform(record); } // Invalid timestamp, validation enabled and ignoreErrors enabled - ingestionConfig = new IngestionConfig(); - ingestionConfig.setRowTimeValueCheck(true); ingestionConfig.setContinueOnError(true); - tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTimeColumnName(timeCol) - .setIngestionConfig(ingestionConfig) - .setTableName("testTable").build(); - - transformer = new DataTypeTransformer(tableConfig, schema); + transformer = new TimeValidationTransformer(tableConfig, schema); GenericRow record2 = getRecord(); record2.putValue(timeCol, 1L); for (int i = 0; i < NUM_ROUNDS; i++) { @@ -237,8 +225,9 @@ record = transformer.transform(record); assertNull(record2.getValue(timeCol)); } - // Valid timestamp - transformer = new DataTypeTransformer(TABLE_CONFIG, schema); + // Valid timestamp, validation enabled + ingestionConfig.setContinueOnError(false); + transformer = new TimeValidationTransformer(tableConfig, schema); GenericRow record3 = getRecord(); Long currentTimeMillis = System.currentTimeMillis(); record3.putValue(timeCol, currentTimeMillis); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java index da5fbc9ddac4..4e4bdea56e6a 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java @@ -116,7 +116,7 @@ public enum TimeColumnType { private boolean _onHeap = false; private boolean _nullHandlingEnabled = false; private boolean _continueOnError = false; - private boolean _rowTimeValueCheck = true; + private boolean _rowTimeValueCheck = false; private boolean _segmentTimeValueCheck = true; private boolean _failOnEmptySegment = false; private boolean _optimizeDictionaryForMetrics = false; diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java index 222d85d27a8b..4b26f33cd7e8 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/IngestionConfig.java @@ -52,7 +52,7 @@ public class IngestionConfig extends BaseJsonConfig { private boolean _continueOnError; @JsonPropertyDescription("Configs related to validate time value for each record during ingestion") - private boolean _rowTimeValueCheck = true; + private boolean _rowTimeValueCheck; @JsonPropertyDescription("Configs related to check time value for segment") private boolean _segmentTimeValueCheck = true;