diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java b/api/src/main/java/org/apache/iceberg/Accessors.java index 08233624f244..19721b7a94a1 100644 --- a/api/src/main/java/org/apache/iceberg/Accessors.java +++ b/api/src/main/java/org/apache/iceberg/Accessors.java @@ -232,6 +232,10 @@ public Map> struct( return accessors; } + @Override public Map> variant() { + return null; + } + @Override public Map> field( Types.NestedField field, Map> fieldResult) { diff --git a/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java b/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java index 4692a8c400f8..814bb72f201c 100644 --- a/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java +++ b/api/src/main/java/org/apache/iceberg/types/GetProjectedIds.java @@ -74,4 +74,9 @@ public Set map(Types.MapType map, Set keyResult, Set } return fieldIds; } + + @Override + public Set variant() { + return null; + } } diff --git a/api/src/main/java/org/apache/iceberg/types/IndexById.java b/api/src/main/java/org/apache/iceberg/types/IndexById.java index 40280c5ed9dd..7c36a3ec241a 100644 --- a/api/src/main/java/org/apache/iceberg/types/IndexById.java +++ b/api/src/main/java/org/apache/iceberg/types/IndexById.java @@ -64,4 +64,9 @@ public Map map( } return null; } + + @Override + public Map variant() { + return null; + } } diff --git a/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java b/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java index 42f0da38167d..a9330cdd730f 100644 --- a/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java +++ b/api/src/main/java/org/apache/iceberg/types/PrimitiveHolder.java @@ -33,6 +33,6 @@ class PrimitiveHolder implements Serializable { } Object readResolve() throws ObjectStreamException { - return Types.fromPrimitiveString(typeAsString); + return Types.fromTypeString(typeAsString); } } diff --git a/api/src/main/java/org/apache/iceberg/types/PruneColumns.java b/api/src/main/java/org/apache/iceberg/types/PruneColumns.java index daf2e6bbc0ca..d9230ce6e02b 100644 --- a/api/src/main/java/org/apache/iceberg/types/PruneColumns.java +++ b/api/src/main/java/org/apache/iceberg/types/PruneColumns.java @@ -159,6 +159,11 @@ public Type map(Types.MapType map, Type ignored, Type valueResult) { return null; } + @Override + public Type variant() { + return null; + } + @Override public Type primitive(Type.PrimitiveType primitive) { return null; diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java index 996556f171b9..d4cf67db7547 100644 --- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java +++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java @@ -614,7 +614,7 @@ public T map(Types.MapType map, T keyResult, T valueResult) { } public T variant() { - return null; + throw new UnsupportedOperationException("Unsupported type: variant"); } public T primitive(Type.PrimitiveType primitive) { diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index 68d6680db8e3..073e8823fc19 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -39,8 +39,8 @@ public class Types { private Types() {} - private static final ImmutableMap TYPES = - ImmutableMap.builder() + private static final ImmutableMap TYPES = + ImmutableMap.builder() .put(BooleanType.get().toString(), BooleanType.get()) .put(IntegerType.get().toString(), IntegerType.get()) .put(LongType.get().toString(), LongType.get()) @@ -55,21 +55,14 @@ private Types() {} .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) + .put(VariantType.get().toString(), VariantType.get()) .buildOrThrow(); private static final Pattern FIXED = Pattern.compile("fixed\\[\\s*(\\d+)\\s*\\]"); private static final Pattern DECIMAL = Pattern.compile("decimal\\(\\s*(\\d+)\\s*,\\s*(\\d+)\\s*\\)"); - public static Type typeFromTypeString(String typeString) { - if (VariantType.get().toString().equalsIgnoreCase(typeString)) { - return Types.VariantType.get(); - } - - return Types.fromPrimitiveString(typeString); - } - - public static PrimitiveType fromPrimitiveString(String typeString) { + public static Type fromTypeString(String typeString) { String lowerTypeString = typeString.toLowerCase(Locale.ROOT); if (TYPES.containsKey(lowerTypeString)) { return TYPES.get(lowerTypeString); @@ -85,7 +78,7 @@ public static PrimitiveType fromPrimitiveString(String typeString) { return DecimalType.of(Integer.parseInt(decimal.group(1)), Integer.parseInt(decimal.group(2))); } - throw new IllegalArgumentException("Cannot parse type string to primitive: " + typeString); + throw new IllegalArgumentException("Cannot parse type string to primitive or variant: " + typeString); } public static class BooleanType extends PrimitiveType { diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java index 7d90e7cf6e24..19d477a53856 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java @@ -23,44 +23,40 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.util.Map; import java.util.Set; -import java.util.stream.Stream; import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.types.Types.IntegerType; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; public class TestTypeUtil { - private static Stream testTypes() { - return Stream.of(Arguments.of(Types.IntegerType.get()), Arguments.of(Types.VariantType.get())); - } - - @ParameterizedTest - @MethodSource("testTypes") - public void testReassignIdsDuplicateColumns(Type testType) { + @Test + public void testReassignIdsDuplicateColumns() { Schema schema = - new Schema(required(0, "a", testType), required(1, "A", Types.IntegerType.get())); + new Schema( + required(0, "a", Types.IntegerType.get()), required(1, "A", Types.IntegerType.get())); Schema sourceSchema = - new Schema(required(1, "a", testType), required(2, "A", Types.IntegerType.get())); + new Schema( + required(1, "a", Types.IntegerType.get()), required(2, "A", Types.IntegerType.get())); final Schema actualSchema = TypeUtil.reassignIds(schema, sourceSchema); assertThat(actualSchema.asStruct()).isEqualTo(sourceSchema.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testReassignIdsWithIdentifier(Type testType) { + @Test + public void testReassignIdsWithIdentifier() { Schema schema = new Schema( Lists.newArrayList( - required(0, "a", Types.IntegerType.get()), required(1, "A", testType)), + required(0, "a", Types.IntegerType.get()), + required(1, "A", Types.IntegerType.get())), Sets.newHashSet(0)); Schema sourceSchema = new Schema( Lists.newArrayList( - required(1, "a", Types.IntegerType.get()), required(2, "A", testType)), + required(1, "a", Types.IntegerType.get()), + required(2, "A", Types.IntegerType.get())), Sets.newHashSet(1)); final Schema actualSchema = TypeUtil.reassignIds(schema, sourceSchema); assertThat(actualSchema.asStruct()).isEqualTo(sourceSchema.asStruct()); @@ -69,18 +65,19 @@ public void testReassignIdsWithIdentifier(Type testType) { .isEqualTo(sourceSchema.identifierFieldIds()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testAssignIncreasingFreshIdWithIdentifier(Type testType) { + @Test + public void testAssignIncreasingFreshIdWithIdentifier() { Schema schema = new Schema( Lists.newArrayList( - required(10, "a", Types.IntegerType.get()), required(11, "A", testType)), + required(10, "a", Types.IntegerType.get()), + required(11, "A", Types.IntegerType.get())), Sets.newHashSet(10)); Schema expectedSchema = new Schema( Lists.newArrayList( - required(1, "a", Types.IntegerType.get()), required(2, "A", testType)), + required(1, "a", Types.IntegerType.get()), + required(2, "A", Types.IntegerType.get())), Sets.newHashSet(1)); final Schema actualSchema = TypeUtil.assignIncreasingFreshIds(schema); assertThat(actualSchema.asStruct()).isEqualTo(expectedSchema.asStruct()); @@ -89,18 +86,19 @@ public void testAssignIncreasingFreshIdWithIdentifier(Type testType) { .isEqualTo(expectedSchema.identifierFieldIds()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testAssignIncreasingFreshIdNewIdentifier(Type testType) { + @Test + public void testAssignIncreasingFreshIdNewIdentifier() { Schema schema = new Schema( Lists.newArrayList( - required(10, "a", Types.IntegerType.get()), required(11, "A", testType)), + required(10, "a", Types.IntegerType.get()), + required(11, "A", Types.IntegerType.get())), Sets.newHashSet(10)); Schema sourceSchema = new Schema( Lists.newArrayList( - required(1, "a", Types.IntegerType.get()), required(2, "A", testType))); + required(1, "a", Types.IntegerType.get()), + required(2, "A", Types.IntegerType.get()))); final Schema actualSchema = TypeUtil.reassignIds(schema, sourceSchema); assertThat(actualSchema.asStruct()).isEqualTo(sourceSchema.asStruct()); assertThat(actualSchema.identifierFieldIds()) @@ -108,14 +106,13 @@ public void testAssignIncreasingFreshIdNewIdentifier(Type testType) { .isEqualTo(Sets.newHashSet(sourceSchema.findField("a").fieldId())); } - @ParameterizedTest - @MethodSource("testTypes") - public void testProject(Type testType) { + @Test + public void testProject() { Schema schema = new Schema( Lists.newArrayList( required(10, "a", Types.IntegerType.get()), - required(11, "A", testType), + required(11, "A", Types.IntegerType.get()), required( 12, "someStruct", @@ -129,7 +126,7 @@ public void testProject(Type testType) { required(16, "c", Types.IntegerType.get()), required(17, "C", Types.IntegerType.get()))))))); - Schema expectedTop = new Schema(Lists.newArrayList(required(11, "A", testType))); + Schema expectedTop = new Schema(Lists.newArrayList(required(11, "A", Types.IntegerType.get()))); Schema actualTop = TypeUtil.project(schema, Sets.newHashSet(11)); assertThat(actualTop.asStruct()).isEqualTo(expectedTop.asStruct()); @@ -149,7 +146,7 @@ public void testProject(Type testType) { Schema expectedDepthTwo = new Schema( Lists.newArrayList( - required(11, "A", testType), + required(11, "A", Types.IntegerType.get()), required( 12, "someStruct", @@ -214,14 +211,13 @@ public void testProjectNaturallyEmpty() { assertThat(actualDepthThreeChildren.asStruct()).isEqualTo(expectedDepthThree.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testProjectEmpty(Type testType) { + @Test + public void testProjectEmpty() { Schema schema = new Schema( Lists.newArrayList( required(10, "a", Types.IntegerType.get()), - required(11, "A", testType), + required(11, "A", Types.IntegerType.get()), required( 12, "someStruct", @@ -253,14 +249,13 @@ public void testProjectEmpty(Type testType) { assertThat(actualDepthTwo.asStruct()).isEqualTo(expectedDepthTwo.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testSelect(Type testType) { + @Test + public void testSelect() { Schema schema = new Schema( Lists.newArrayList( required(10, "a", Types.IntegerType.get()), - required(11, "A", testType), + required(11, "A", Types.IntegerType.get()), required( 12, "someStruct", @@ -274,7 +269,7 @@ public void testSelect(Type testType) { required(16, "c", Types.IntegerType.get()), required(17, "C", Types.IntegerType.get()))))))); - Schema expectedTop = new Schema(Lists.newArrayList(required(11, "A", testType))); + Schema expectedTop = new Schema(Lists.newArrayList(required(11, "A", Types.IntegerType.get()))); Schema actualTop = TypeUtil.select(schema, Sets.newHashSet(11)); assertThat(actualTop.asStruct()).isEqualTo(expectedTop.asStruct()); @@ -302,7 +297,7 @@ public void testSelect(Type testType) { Schema expectedDepthTwo = new Schema( Lists.newArrayList( - required(11, "A", testType), + required(11, "A", Types.IntegerType.get()), required( 12, "someStruct", @@ -316,14 +311,13 @@ public void testSelect(Type testType) { assertThat(actualDepthTwo.asStruct()).isEqualTo(expectedDepthTwo.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testProjectMap(Type testType) { + @Test + public void testProjectMap() { // We can't partially project keys because it changes key equality Schema schema = new Schema( Lists.newArrayList( - required(10, "a", testType), + required(10, "a", Types.IntegerType.get()), required(11, "A", Types.IntegerType.get()), required( 12, @@ -355,14 +349,15 @@ public void testProjectMap(Type testType) { .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("Cannot explicitly project List or Map types"); - Schema expectedTopLevel = new Schema(Lists.newArrayList(required(10, "a", testType))); + Schema expectedTopLevel = + new Schema(Lists.newArrayList(required(10, "a", Types.IntegerType.get()))); Schema actualTopLevel = TypeUtil.project(schema, Sets.newHashSet(10)); assertThat(actualTopLevel.asStruct()).isEqualTo(expectedTopLevel.asStruct()); Schema expectedDepthOne = new Schema( Lists.newArrayList( - required(10, "a", testType), + required(10, "a", Types.IntegerType.get()), required( 12, "map", @@ -381,7 +376,7 @@ public void testProjectMap(Type testType) { Schema expectedDepthTwo = new Schema( Lists.newArrayList( - required(10, "a", testType), + required(10, "a", Types.IntegerType.get()), required( 12, "map", @@ -403,14 +398,13 @@ public void testProjectMap(Type testType) { assertThat(actualDepthTwo.asStruct()).isEqualTo(expectedDepthTwo.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testGetProjectedIds(Type testType) { + @Test + public void testGetProjectedIds() { Schema schema = new Schema( Lists.newArrayList( required(10, "a", Types.IntegerType.get()), - required(11, "A", testType), + required(11, "A", Types.IntegerType.get()), required(35, "emptyStruct", Types.StructType.of()), required( 12, @@ -431,9 +425,8 @@ public void testGetProjectedIds(Type testType) { assertThat(actualIds).isEqualTo(expectedIds); } - @ParameterizedTest - @MethodSource("testTypes") - public void testProjectListNested(Type testType) { + @Test + public void testProjectListNested() { Schema schema = new Schema( Lists.newArrayList( @@ -447,7 +440,7 @@ public void testProjectListNested(Type testType) { Types.MapType.ofRequired( 15, 16, - testType, + IntegerType.get(), Types.StructType.of( required(17, "x", Types.IntegerType.get()), required(18, "y", Types.IntegerType.get())))))))); @@ -474,15 +467,15 @@ public void testProjectListNested(Type testType) { 13, Types.ListType.ofRequired( 14, - Types.MapType.ofRequired(15, 16, testType, Types.StructType.of())))))); + Types.MapType.ofRequired( + 15, 16, IntegerType.get(), Types.StructType.of())))))); Schema actual = TypeUtil.project(schema, Sets.newHashSet(16)); assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testProjectMapNested(Type testType) { + @Test + public void testProjectMapNested() { Schema schema = new Schema( Lists.newArrayList( @@ -496,7 +489,7 @@ public void testProjectMapNested(Type testType) { Types.MapType.ofRequired( 15, 16, - testType, + Types.IntegerType.get(), Types.ListType.ofRequired( 17, Types.StructType.of( @@ -528,34 +521,33 @@ public void testProjectMapNested(Type testType) { Types.MapType.ofRequired( 15, 16, - testType, + Types.IntegerType.get(), Types.ListType.ofRequired(17, Types.StructType.of())))))); Schema actual = TypeUtil.project(schema, Sets.newHashSet(17)); assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testReassignIdsIllegalArgumentException(Type testType) { + @Test + public void testReassignIdsIllegalArgumentException() { Schema schema = - new Schema(required(1, "a", Types.IntegerType.get()), required(2, "b", testType)); + new Schema( + required(1, "a", Types.IntegerType.get()), required(2, "b", Types.IntegerType.get())); Schema sourceSchema = new Schema(required(1, "a", Types.IntegerType.get())); assertThatThrownBy(() -> TypeUtil.reassignIds(schema, sourceSchema)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Field b not found in source schema"); } - @ParameterizedTest - @MethodSource("testTypes") - public void testValidateSchemaViaIndexByName(Type testType) { + @Test + public void testValidateSchemaViaIndexByName() { Types.NestedField nestedType = Types.NestedField.required( 1, "a", Types.StructType.of( required(2, "b", Types.StructType.of(required(3, "c", Types.BooleanType.get()))), - required(4, "b.c", testType))); + required(4, "b.c", Types.BooleanType.get()))); assertThatThrownBy(() -> TypeUtil.indexByName(Types.StructType.of(nestedType))) .isInstanceOf(RuntimeException.class) @@ -585,8 +577,8 @@ public void testSelectNot() { required(3, "lat", Types.DoubleType.get()), required(4, "long", Types.DoubleType.get()))))); - Schema actualNoPrimitive = TypeUtil.selectNot(schema, Sets.newHashSet(1)); - assertThat(actualNoPrimitive.asStruct()).isEqualTo(expectedNoPrimitive.asStruct()); + Schema actualNoPrimitve = TypeUtil.selectNot(schema, Sets.newHashSet(1)); + assertThat(actualNoPrimitve.asStruct()).isEqualTo(expectedNoPrimitive.asStruct()); // Expected legacy behavior is to completely remove structs if their elements are removed Schema expectedNoStructElements = new Schema(required(1, "id", Types.LongType.get())); @@ -598,9 +590,8 @@ public void testSelectNot() { assertThat(actualNoStruct.asStruct()).isEqualTo(schema.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testReassignOrRefreshIds(Type testType) { + @Test + public void testReassignOrRefreshIds() { Schema schema = new Schema( Lists.newArrayList( @@ -611,12 +602,13 @@ public void testReassignOrRefreshIds(Type testType) { .withInitialDefault(23) .withWriteDefault(34) .build(), - required(12, "B", testType)), + required(12, "B", Types.IntegerType.get())), Sets.newHashSet(10)); Schema sourceSchema = new Schema( Lists.newArrayList( - required(1, "a", Types.IntegerType.get()), required(15, "B", testType))); + required(1, "a", Types.IntegerType.get()), + required(15, "B", Types.IntegerType.get()))); Schema actualSchema = TypeUtil.reassignOrRefreshIds(schema, sourceSchema); Schema expectedSchema = @@ -629,27 +621,52 @@ public void testReassignOrRefreshIds(Type testType) { .withInitialDefault(23) .withWriteDefault(34) .build(), - required(15, "B", testType))); + required(15, "B", Types.IntegerType.get()))); assertThat(actualSchema.asStruct()).isEqualTo(expectedSchema.asStruct()); } - @ParameterizedTest - @MethodSource("testTypes") - public void testReassignOrRefreshIdsCaseInsensitive(Type testType) { + @Test + public void testReassignOrRefreshIdsCaseInsensitive() { Schema schema = new Schema( Lists.newArrayList( - required(1, "FIELD1", Types.IntegerType.get()), required(2, "FIELD2", testType))); + required(1, "FIELD1", Types.IntegerType.get()), + required(2, "FIELD2", Types.IntegerType.get()))); Schema sourceSchema = new Schema( Lists.newArrayList( - required(1, "field1", Types.IntegerType.get()), required(2, "field2", testType))); + required(1, "field1", Types.IntegerType.get()), + required(2, "field2", Types.IntegerType.get()))); final Schema actualSchema = TypeUtil.reassignOrRefreshIds(schema, sourceSchema, false); final Schema expectedSchema = new Schema( Lists.newArrayList( - required(1, "FIELD1", Types.IntegerType.get()), required(2, "FIELD2", testType))); + required(1, "FIELD1", Types.IntegerType.get()), + required(2, "FIELD2", Types.IntegerType.get()))); assertThat(actualSchema.asStruct()).isEqualTo(expectedSchema.asStruct()); } + + @Test + public void testVariantType() { + Schema schema = + new Schema(required(0, "v", Types.VariantType.get()), required(1, "A", Types.IntegerType.get())); + Schema sourceSchema = + new Schema(required(1, "v", Types.VariantType.get()), required(2, "A", Types.IntegerType.get())); + final Schema reassignedSchema = TypeUtil.reassignIds(schema, sourceSchema); + assertThat(reassignedSchema.asStruct()).isEqualTo(sourceSchema.asStruct()); + + Schema expectedSchema = new Schema(Lists.newArrayList(required(1, "v", Types.VariantType.get()))); + Schema projectedSchema = TypeUtil.project(sourceSchema, Sets.newHashSet(1)); + assertThat(projectedSchema.asStruct()).isEqualTo(expectedSchema.asStruct()); + + Set projectedIds = TypeUtil.getProjectedIds(sourceSchema); + assertThat(Set.of(1,2)).isEqualTo(projectedIds); + + Map indexByIds = TypeUtil.indexById(sourceSchema.asStruct()); + assertThat(indexByIds.get(1).type().isVariantType()).isTrue(); + + Map indexNameByIds = TypeUtil.indexNameById(sourceSchema.asStruct()); + assertThat(indexNameByIds.get(1)).isEqualTo("v"); + } } diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index 226c53f1e9ce..cc8f699b1a9f 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -26,25 +26,28 @@ public class TestTypes { @Test - public void fromPrimitiveString() { - assertThat(Types.fromPrimitiveString("boolean")).isSameAs(Types.BooleanType.get()); - assertThat(Types.fromPrimitiveString("BooLean")).isSameAs(Types.BooleanType.get()); + public void fromTypeString() { + assertThat(Types.fromTypeString("boolean")).isSameAs(Types.BooleanType.get()); + assertThat(Types.fromTypeString("BooLean")).isSameAs(Types.BooleanType.get()); - assertThat(Types.fromPrimitiveString("timestamp")).isSameAs(Types.TimestampType.withoutZone()); - assertThat(Types.fromPrimitiveString("timestamptz")).isSameAs(Types.TimestampType.withZone()); - assertThat(Types.fromPrimitiveString("timestamp_ns")) + assertThat(Types.fromTypeString("timestamp")).isSameAs(Types.TimestampType.withoutZone()); + assertThat(Types.fromTypeString("timestamptz")).isSameAs(Types.TimestampType.withZone()); + assertThat(Types.fromTypeString("timestamp_ns")) .isSameAs(Types.TimestampNanoType.withoutZone()); - assertThat(Types.fromPrimitiveString("timestamptz_ns")) + assertThat(Types.fromTypeString("timestamptz_ns")) .isSameAs(Types.TimestampNanoType.withZone()); - assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")).isEqualTo(Types.FixedType.ofLength(3)); + assertThat(Types.fromTypeString("Fixed[ 3 ]")).isEqualTo(Types.FixedType.ofLength(3)); - assertThat(Types.fromPrimitiveString("Decimal( 2 , 3 )")).isEqualTo(Types.DecimalType.of(2, 3)); + assertThat(Types.fromTypeString("Decimal( 2 , 3 )")).isEqualTo(Types.DecimalType.of(2, 3)); - assertThat(Types.fromPrimitiveString("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2, 3)); + assertThat(Types.fromTypeString("Decimal(2,3)")).isEqualTo(Types.DecimalType.of(2, 3)); + + assertThat(Types.fromTypeString("variant")).isSameAs(Types.VariantType.get()); + assertThat(Types.fromTypeString("Variant")).isSameAs(Types.VariantType.get()); assertThatExceptionOfType(IllegalArgumentException.class) - .isThrownBy(() -> Types.fromPrimitiveString("Unknown")) + .isThrownBy(() -> Types.fromTypeString("Unknown")) .withMessageContaining("Unknown"); } } diff --git a/core/src/main/java/org/apache/iceberg/SchemaParser.java b/core/src/main/java/org/apache/iceberg/SchemaParser.java index 22ad0e9af708..40ef13b004b4 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaParser.java +++ b/core/src/main/java/org/apache/iceberg/SchemaParser.java @@ -181,7 +181,7 @@ public static String toJson(Schema schema, boolean pretty) { private static Type typeFromJson(JsonNode json) { if (json.isTextual()) { - return Types.typeFromTypeString(json.asText()); + return Types.fromTypeString(json.asText()); } else if (json.isObject()) { JsonNode typeObj = json.get(TYPE); if (typeObj != null) { diff --git a/core/src/main/java/org/apache/iceberg/avro/BuildAvroProjection.java b/core/src/main/java/org/apache/iceberg/avro/BuildAvroProjection.java index aa31adae2692..8b0d5ab11a1a 100644 --- a/core/src/main/java/org/apache/iceberg/avro/BuildAvroProjection.java +++ b/core/src/main/java/org/apache/iceberg/avro/BuildAvroProjection.java @@ -56,6 +56,7 @@ class BuildAvroProjection extends AvroCustomOrderSchemaVisitor names, Iterable schemaIterable) { + // TODO: When the Variant logical type is introduced in Avro, handle the following in a separate visitor method if (current.isVariantType()) { return variant(record); } diff --git a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java index 05f8afaba10d..cba26c7ea83c 100644 --- a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java +++ b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java @@ -49,15 +49,6 @@ abstract class TypeToSchema extends TypeUtil.SchemaVisitor { private static final Schema UUID_SCHEMA = LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16)); private static final Schema BINARY_SCHEMA = Schema.create(Schema.Type.BYTES); - private static final Schema VARIANT_SCHEMA = - Schema.createRecord( - "variant", - null, - null, - false, - List.of( - new Schema.Field("metadata", BINARY_SCHEMA), - new Schema.Field("value", BINARY_SCHEMA))); static { TIMESTAMP_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, false); @@ -198,7 +189,15 @@ public Schema map(Types.MapType map, Schema keySchema, Schema valueSchema) { @Override public Schema variant() { - return VARIANT_SCHEMA; + String recordName = "r" + fieldIds.peek(); + return Schema.createRecord( + recordName, + null, + null, + false, + List.of( + new Schema.Field("metadata", BINARY_SCHEMA), + new Schema.Field("value", BINARY_SCHEMA))); } @Override diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java index 46e52f9c3f44..6a65bf762880 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java @@ -30,7 +30,6 @@ import java.util.Map; import java.util.Set; import java.util.stream.IntStream; -import java.util.stream.Stream; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -43,9 +42,6 @@ import org.apache.iceberg.view.ViewVersion; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; public class TestMetadataUpdateParser { @@ -56,15 +52,6 @@ public class TestMetadataUpdateParser { Types.NestedField.required(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get())); - private static final Schema ID_VARIANTDATA_SCHEMA = - new Schema( - Types.NestedField.required(1, "id", Types.IntegerType.get()), - Types.NestedField.optional(2, "data", Types.VariantType.get())); - - private static Stream testSchemas() { - return Stream.of(Arguments.of(ID_DATA_SCHEMA), Arguments.of(ID_VARIANTDATA_SCHEMA)); - } - @Test public void testMetadataUpdateWithoutActionCannotDeserialize() { List invalidJson = @@ -121,19 +108,19 @@ public void testUpgradeFormatVersionFromJson() { } /** AddSchema * */ - @ParameterizedTest - @MethodSource("testSchemas") - public void testAddSchemaFromJson(Schema schema) { + @Test + public void testAddSchemaFromJson() { String action = MetadataUpdateParser.ADD_SCHEMA; + Schema schema = ID_DATA_SCHEMA; String json = String.format("{\"action\":\"add-schema\",\"schema\":%s}", SchemaParser.toJson(schema)); MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema); assertEquals(action, actualUpdate, MetadataUpdateParser.fromJson(json)); } - @ParameterizedTest - @MethodSource("testSchemas") - public void testAddSchemaToJson(Schema schema) { + @Test + public void testAddSchemaToJson() { + Schema schema = ID_DATA_SCHEMA; int lastColumnId = schema.highestFieldId(); String expected = String.format( diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaParser.java b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java index ebd197a68af0..40db5cfee2cb 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaParser.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaParser.java @@ -123,4 +123,14 @@ public void testPrimitiveTypeDefaultValues(Type.PrimitiveType type, Object defau assertThat(serialized.findField("col_with_default").initialDefault()).isEqualTo(defaultValue); assertThat(serialized.findField("col_with_default").writeDefault()).isEqualTo(defaultValue); } + + @Test + public void testVariantType() throws IOException { + Schema schema = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.optional(2, "data", Types.VariantType.get())); + + writeAndValidate(schema); + } } diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java index 656e72a0c19c..3a0423e8e2b7 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java @@ -75,7 +75,7 @@ private static NestedField[] primitiveFields( optional( atomicInteger.incrementAndGet(), type.toString(), - Types.fromPrimitiveString(type.toString()))) + Types.fromTypeString(type.toString()))) .toArray(NestedField[]::new); } diff --git a/core/src/test/java/org/apache/iceberg/avro/TestBuildAvroProjection.java b/core/src/test/java/org/apache/iceberg/avro/TestBuildAvroProjection.java index e1b795d93e71..79a2afccdb9a 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestBuildAvroProjection.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestBuildAvroProjection.java @@ -409,15 +409,14 @@ public void projectVariantSchemaUnchanged() { final org.apache.avro.Schema expected = SchemaBuilder.record("variant") + .prop(AvroSchemaUtil.FIELD_ID_PROP, "1") .namespace("unit.test") .fields() .name("metadata") - .prop(AvroSchemaUtil.FIELD_ID_PROP, "1") .type() .bytesType() .noDefault() .name("value") - .prop(AvroSchemaUtil.FIELD_ID_PROP, "2") .type() .bytesType() .noDefault()