Skip to content

Commit

Permalink
Use ISODateTimeFormat as default for SIMPLE_DATE_FORMAT (#9378)
Browse files Browse the repository at this point in the history
* Use ISODateTimeFormat as default for SIMPLE_DATE_FORMAT

* Fix linting

* Fix tests

* Add default MILLISECONDS for EPOCH in pipe format

* Reduce log level

Co-authored-by: Kartik Khare <[email protected]>
  • Loading branch information
KKcorps and Kartik Khare authored Sep 16, 2022
1 parent bfa2a5a commit 9b3ac2a
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.pinot.spi.data.DateTimeFormatSpec;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.ISODateTimeFormat;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
Expand Down Expand Up @@ -86,6 +87,19 @@ public Object[][] provideTestFromFormatToMillisData() {
"TIMESTAMP", "2017-07-01 00:00:00", Timestamp.valueOf("2017-07-01 00:00:00").getTime()
});
entries.add(new Object[]{"TIMESTAMP", "1498892400000", 1498892400000L});
entries.add(new Object[]{
"SIMPLE_DATE_FORMAT", "2017-07-01",
DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC().parseMillis("20170701")
});
entries.add(new Object[]{
"SIMPLE_DATE_FORMAT", "2017-07-01T12:45:50",
DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss").withZoneUTC().parseMillis("2017-07-01T12:45:50")
});
entries.add(new Object[]{
"SIMPLE_DATE_FORMAT", "2017",
DateTimeFormat.forPattern("yyyy").withZoneUTC().parseMillis("2017")
});

entries.add(new Object[]{
"SIMPLE_DATE_FORMAT|yyyyMMdd", "20170701",
DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC().parseMillis("20170701")
Expand Down Expand Up @@ -200,6 +214,11 @@ public Object[][] provideTestFromMillisToFormatData() {
"SIMPLE_DATE_FORMAT|M/d/yyyy h a", 1502066750000L,
DateTimeFormat.forPattern("M/d/yyyy h a").withZoneUTC().withLocale(Locale.ENGLISH).print(1502066750000L)
});

entries.add(new Object[]{
"SIMPLE_DATE_FORMAT", 1502066750000L,
ISODateTimeFormat.dateTimeNoMillis().withZoneUTC().withLocale(Locale.ENGLISH).print(1502066750000L)
});
return entries.toArray(new Object[entries.size()][]);
}

Expand Down Expand Up @@ -297,13 +316,21 @@ public Object[][] provideTestGetFromFormatData() {
new Object[]{"TIMESTAMP", 1, TimeUnit.MILLISECONDS, TimeFormat.TIMESTAMP, null,
DateTimeZone.UTC});

entries.add(
new Object[]{"EPOCH", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.EPOCH, null, DateTimeZone.UTC});

entries.add(
new Object[]{"EPOCH|HOURS|1", 1, TimeUnit.HOURS, DateTimeFieldSpec.TimeFormat.EPOCH, null, DateTimeZone.UTC});

entries.add(new Object[]{
"EPOCH|MINUTES|5", 5, TimeUnit.MINUTES, DateTimeFieldSpec.TimeFormat.EPOCH, null, DateTimeZone.UTC
});

entries.add(new Object[]{
"SIMPLE_DATE_FORMAT", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
null, DateTimeZone.UTC
});

entries.add(new Object[]{
"SIMPLE_DATE_FORMAT|yyyyMMdd", 1, TimeUnit.MILLISECONDS, DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT,
"yyyyMMdd", DateTimeZone.UTC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@ public Object[][] provideTestFormatData() {
List<Object[]> entries = new ArrayList<>();
entries.add(new Object[]{name, dataType, "1:hours", granularity, true, null});
entries.add(new Object[]{name, dataType, "one_hours", granularity, true, null});
entries.add(new Object[]{name, dataType, "1:HOURS:SIMPLE_DATE_FORMAT", granularity, true, null});
entries.add(new Object[]{name, dataType, "1:hour:EPOCH", granularity, true, null});
entries.add(new Object[]{name, dataType, "1:HOUR:EPOCH:yyyyMMdd", granularity, true, null});
entries.add(new Object[]{name, dataType, "0:HOURS:EPOCH", granularity, true, null});
Expand All @@ -272,6 +271,12 @@ public Object[][] provideTestFormatData() {
name, dataType, "1:HOURS:EPOCH", granularity, false,
new DateTimeFieldSpec(name, dataType, "1:HOURS:EPOCH", granularity)
});

entries.add(new Object[]{
name, dataType, "1:DAYS:SIMPLE_DATE_FORMAT", granularity, false,
new DateTimeFieldSpec(name, dataType, "1:DAYS:SIMPLE_DATE_FORMAT", granularity)
});

entries.add(new Object[]{
name, dataType, "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", granularity, false,
new DateTimeFieldSpec(name, dataType, "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", granularity)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ public void testDateTimeFieldSpec()
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ "\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\","
+ "\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT\",\"granularity\":\"1:DAYS\"}]}");
checkValidationFails(schema);
SchemaUtils.validate(schema);

schema = Schema.fromString(
"{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ {\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,24 @@
*/
package org.apache.pinot.spi.data;

import com.google.common.base.Preconditions;
import java.util.Locale;
import java.util.Objects;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.lang3.StringUtils;
import org.apache.pinot.spi.data.DateTimeFieldSpec.TimeFormat;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class DateTimeFormatPatternSpec {
public static final Logger LOGGER = LoggerFactory.getLogger(DateTimeFormatPatternSpec.class);

public static final DateTimeZone DEFAULT_DATE_TIME_ZONE = DateTimeZone.UTC;
public static final Locale DEFAULT_LOCALE = Locale.ENGLISH;

Expand All @@ -56,22 +59,33 @@ public DateTimeFormatPatternSpec(TimeFormat timeFormat) {
public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPatternWithTz) {
_timeFormat = timeFormat;
if (timeFormat == TimeFormat.SIMPLE_DATE_FORMAT) {
Preconditions.checkArgument(StringUtils.isNotEmpty(sdfPatternWithTz), "Must provide SIMPLE_DATE_FORMAT pattern");
Matcher m = SDF_PATTERN_WITH_TIMEZONE.matcher(sdfPatternWithTz);
if (m.find()) {
_sdfPattern = m.group(SDF_PATTERN_GROUP).trim();
String timeZone = m.group(TIME_ZONE_GROUP).trim();
try {
_dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone));
} catch (Exception e) {
throw new IllegalArgumentException("Invalid time zone: " + timeZone);
if (sdfPatternWithTz != null) {
Matcher m = SDF_PATTERN_WITH_TIMEZONE.matcher(sdfPatternWithTz);
if (m.find()) {
_sdfPattern = m.group(SDF_PATTERN_GROUP).trim();
String timeZone = m.group(TIME_ZONE_GROUP).trim();
try {
_dateTimeZone = DateTimeZone.forTimeZone(TimeZone.getTimeZone(timeZone));
} catch (Exception e) {
throw new IllegalArgumentException("Invalid time zone: " + timeZone);
}
} else {
_sdfPattern = sdfPatternWithTz;
_dateTimeZone = DEFAULT_DATE_TIME_ZONE;
}
} else {
_sdfPattern = sdfPatternWithTz;
_sdfPattern = null;
_dateTimeZone = DEFAULT_DATE_TIME_ZONE;
}
try {
_dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
if (_sdfPattern == null) {
LOGGER.debug("SIMPLE_DATE_FORMAT pattern was found to be null. Using ISODateTimeFormat as default");
_dateTimeFormatter =
ISODateTimeFormat.dateOptionalTimeParser().withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
} else {
_dateTimeFormatter =
DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
}
} catch (Exception e) {
throw new IllegalArgumentException("Invalid SIMPLE_DATE_FORMAT pattern: " + _sdfPattern);
}
Expand All @@ -85,7 +99,6 @@ public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPatt
public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPattern, @Nullable String timeZone) {
_timeFormat = timeFormat;
if (_timeFormat == TimeFormat.SIMPLE_DATE_FORMAT) {
Preconditions.checkArgument(StringUtils.isNotEmpty(sdfPattern), "Must provide SIMPLE_DATE_FORMAT pattern");
_sdfPattern = sdfPattern;
if (timeZone != null) {
try {
Expand All @@ -97,7 +110,14 @@ public DateTimeFormatPatternSpec(TimeFormat timeFormat, @Nullable String sdfPatt
_dateTimeZone = DEFAULT_DATE_TIME_ZONE;
}
try {
_dateTimeFormatter = DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
if (_sdfPattern == null) {
LOGGER.debug("SIMPLE_DATE_FORMAT pattern was found to be null. Using ISODateTimeFormat as default");
_dateTimeFormatter =
ISODateTimeFormat.dateOptionalTimeParser().withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
} else {
_dateTimeFormatter =
DateTimeFormat.forPattern(_sdfPattern).withZone(_dateTimeZone).withLocale(DEFAULT_LOCALE);
}
} catch (Exception e) {
throw new IllegalArgumentException("Invalid SIMPLE_DATE_FORMAT pattern: " + _sdfPattern);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.pinot.spi.utils.TimestampUtils;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;


/**
Expand Down Expand Up @@ -113,10 +114,8 @@ public DateTimeFormatSpec(String format) {
case SIMPLE_DATE_FORMAT:
_size = 1;
_unitSpec = DateTimeFormatUnitSpec.MILLISECONDS;
Preconditions.checkArgument(tokens.length == COLON_FORMAT_MAX_TOKENS,
"Invalid SIMPLE_DATE_FORMAT format: %s, must be of format "
+ "'<size>:<timeUnit>:SIMPLE_DATE_FORMAT:<patternWithTz>'", format);
String patternStr = tokens[COLON_FORMAT_PATTERN_POSITION];
String patternStr =
tokens.length > COLON_FORMAT_PATTERN_POSITION ? tokens[COLON_FORMAT_PATTERN_POSITION] : null;
try {
_patternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, patternStr);
} catch (Exception e) {
Expand Down Expand Up @@ -156,10 +155,9 @@ public DateTimeFormatSpec(String format) {
} else {
_size = 1;
}
Preconditions.checkArgument(tokens.length > PIPE_FORMAT_TIME_UNIT_POSITION,
"Invalid EPOCH format: %s, must be of format 'EPOCH|<timeUnit>(|<size>)'", format);
try {
_unitSpec = new DateTimeFormatUnitSpec(tokens[PIPE_FORMAT_TIME_UNIT_POSITION]);
_unitSpec = tokens.length > PIPE_FORMAT_TIME_UNIT_POSITION ? new DateTimeFormatUnitSpec(
tokens[PIPE_FORMAT_TIME_UNIT_POSITION]) : DateTimeFormatUnitSpec.MILLISECONDS;
} catch (Exception e) {
throw new IllegalArgumentException(
String.format("Invalid time unit: %s in format: %s", tokens[PIPE_FORMAT_TIME_UNIT_POSITION], format));
Expand All @@ -174,9 +172,6 @@ public DateTimeFormatSpec(String format) {
case SIMPLE_DATE_FORMAT:
_size = 1;
_unitSpec = DateTimeFormatUnitSpec.MILLISECONDS;
Preconditions.checkArgument(tokens.length > PIPE_FORMAT_PATTERN_POSITION,
"Invalid SIMPLE_DATE_FORMAT format: %s, must be of format 'SIMPLE_DATE_FORMAT|<pattern>(|<timeZone>)'",
format);
if (tokens.length > PIPE_FORMAT_TIME_ZONE_POSITION) {
try {
_patternSpec =
Expand All @@ -189,8 +184,9 @@ public DateTimeFormatSpec(String format) {
}
} else {
try {
_patternSpec =
new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, tokens[PIPE_FORMAT_PATTERN_POSITION]);
String pattern =
tokens.length > PIPE_FORMAT_PATTERN_POSITION ? tokens[PIPE_FORMAT_PATTERN_POSITION] : null;
_patternSpec = new DateTimeFormatPatternSpec(TimeFormat.SIMPLE_DATE_FORMAT, pattern);
} catch (Exception e) {
throw new IllegalArgumentException(String.format("Invalid SIMPLE_DATE_FORMAT pattern: %s in format: %s",
tokens[PIPE_FORMAT_PATTERN_POSITION], format));
Expand Down Expand Up @@ -277,7 +273,12 @@ public String fromMillisToFormat(long timeMs) {
case TIMESTAMP:
return new Timestamp(timeMs).toString();
case SIMPLE_DATE_FORMAT:
return _patternSpec.getDateTimeFormatter().print(timeMs);
if (_patternSpec.getSdfPattern() != null) {
return _patternSpec.getDateTimeFormatter().print(timeMs);
} else {
return ISODateTimeFormat.dateTimeNoMillis().withZone(DateTimeFormatPatternSpec.DEFAULT_DATE_TIME_ZONE)
.withLocale(DateTimeFormatPatternSpec.DEFAULT_LOCALE).print(timeMs);
}
default:
throw new IllegalStateException("Unsupported time format: " + _patternSpec.getTimeFormat());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ public void testDateTimeFormatSpec() {
assertEquals(new DateTimeFormatSpec("SIMPLE_DATE_FORMAT|yyyy-MM-dd|CST"), dateTimeFormatSpec);

assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("1:DAY"));
assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH"));

assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("one:DAYS:EPOCH"));
assertThrows(IllegalArgumentException.class, () -> new DateTimeFormatSpec("EPOCH|DAYS|one"));
Expand Down

0 comments on commit 9b3ac2a

Please sign in to comment.