Skip to content

Commit

Permalink
[590] Add Hudi Glue Catalog Sync Implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
Vamsi committed Feb 20, 2025
1 parent f194f4c commit cdecdec
Show file tree
Hide file tree
Showing 17 changed files with 1,429 additions and 132 deletions.
8 changes: 8 additions & 0 deletions xtable-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@
<version>${project.version}</version>
</dependency>

<!-- Hudi dependencies -->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hive-sync</artifactId>
<version>${hudi.version}</version>
<scope>provided</scope>
</dependency>

<!-- Hadoop dependencies -->
<dependency>
<groupId>org.apache.hadoop</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import lombok.RequiredArgsConstructor;
import lombok.ToString;

import org.apache.hudi.hive.MultiPartKeysValueExtractor;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
Expand All @@ -54,6 +56,15 @@ public class GlueCatalogConfig {
@JsonProperty("externalCatalog.glue.credentialsProviderClass")
private final String clientCredentialsProviderClass;

@JsonProperty("externalCatalog.glue.schema_string_length_thresh")
private int schemaLengthThreshold = 4000;

@JsonProperty("externalCatalog.glue.partition_extractor_class")
private String partitionExtractorClass = MultiPartKeysValueExtractor.class.getName();

@JsonProperty("externalCatalog.glue.max_partitions_per_request")
private int maxPartitionsPerRequest = 1000;

/**
* In case a credentialsProviderClass is configured and require additional properties for
* instantiation, those properties should start with {@link
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.xtable.glue;

import static org.apache.xtable.catalog.CatalogUtils.toHierarchicalTableIdentifier;

import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
Expand All @@ -31,13 +29,10 @@
import org.apache.xtable.conversion.SourceTable;
import org.apache.xtable.exception.CatalogSyncException;
import org.apache.xtable.model.catalog.CatalogTableIdentifier;
import org.apache.xtable.model.catalog.HierarchicalTableIdentifier;
import org.apache.xtable.model.storage.CatalogType;
import org.apache.xtable.spi.extractor.CatalogConversionSource;

import software.amazon.awssdk.services.glue.GlueClient;
import software.amazon.awssdk.services.glue.model.GetTableRequest;
import software.amazon.awssdk.services.glue.model.GetTableResponse;
import software.amazon.awssdk.services.glue.model.GlueException;
import software.amazon.awssdk.services.glue.model.Table;

Expand All @@ -61,19 +56,12 @@ public GlueCatalogConversionSource(

@Override
public SourceTable getSourceTable(CatalogTableIdentifier tblIdentifier) {
HierarchicalTableIdentifier tableIdentifier = toHierarchicalTableIdentifier(tblIdentifier);
try {
GetTableResponse response =
glueClient.getTable(
GetTableRequest.builder()
.catalogId(glueCatalogConfig.getCatalogId())
.databaseName(tableIdentifier.getDatabaseName())
.name(tableIdentifier.getTableName())
.build());
Table table = response.table();
Table table =
GlueCatalogTableUtils.getTable(
glueClient, glueCatalogConfig.getCatalogId(), tblIdentifier);
if (table == null) {
throw new IllegalStateException(
String.format("table: %s is null", tableIdentifier.getId()));
throw new IllegalStateException(String.format("table: %s is null", tblIdentifier.getId()));
}

String tableFormat = TableFormatUtils.getTableFormat(table.parameters());
Expand All @@ -91,7 +79,7 @@ public SourceTable getSourceTable(CatalogTableIdentifier tblIdentifier) {
.additionalProperties(tableProperties)
.build();
} catch (GlueException e) {
throw new CatalogSyncException("Failed to get table: " + tableIdentifier.getId(), e);
throw new CatalogSyncException("Failed to get table: " + tblIdentifier.getId(), e);
}
}

Expand Down
Loading

0 comments on commit cdecdec

Please sign in to comment.