diff --git a/client/mocks/glue.go b/client/mocks/glue.go index 44f665f14..df6e14f08 100644 --- a/client/mocks/glue.go +++ b/client/mocks/glue.go @@ -235,6 +235,26 @@ func (mr *MockGlueClientMockRecorder) GetMLTransforms(arg0, arg1 interface{}, ar return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetMLTransforms", reflect.TypeOf((*MockGlueClient)(nil).GetMLTransforms), varargs...) } +// GetPartitionIndexes mocks base method. +func (m *MockGlueClient) GetPartitionIndexes(arg0 context.Context, arg1 *glue.GetPartitionIndexesInput, arg2 ...func(*glue.Options)) (*glue.GetPartitionIndexesOutput, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "GetPartitionIndexes", varargs...) + ret0, _ := ret[0].(*glue.GetPartitionIndexesOutput) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetPartitionIndexes indicates an expected call of GetPartitionIndexes. +func (mr *MockGlueClientMockRecorder) GetPartitionIndexes(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPartitionIndexes", reflect.TypeOf((*MockGlueClient)(nil).GetPartitionIndexes), varargs...) +} + // GetSchema mocks base method. func (m *MockGlueClient) GetSchema(arg0 context.Context, arg1 *glue.GetSchemaInput, arg2 ...func(*glue.Options)) (*glue.GetSchemaOutput, error) { m.ctrl.T.Helper() diff --git a/client/resolvers.go b/client/resolvers.go index 40700af0f..b729e50f3 100644 --- a/client/resolvers.go +++ b/client/resolvers.go @@ -2,6 +2,7 @@ package client import ( "context" + "fmt" "reflect" "time" @@ -82,3 +83,47 @@ func ResolveTimestampField(path string, rfcs ...string) func(_ context.Context, } } } + +/* +SliceJsonResolver resolves slice of objects into a map[string]interface{}. +For example object: SliceJsonStruct{Nested: &SliceJsonStruct{ + Nested: &SliceJsonStruct{ + Value: []types1.Tag{{ + Key: "k1", + Value: "v1", + }, { + Key: "k2", + Value: "v2", + }}, + }, + }} +can be converted to map[string]interface{}{"k1":"v1","k2":"v2"} by setting a resolver with next params: +SliceJsonResolver("Nested.Nested.Value", "Key", "Value") +*/ +func SliceJsonResolver(path, keyPath, valuePath string) schema.ColumnResolver { + return func(_ context.Context, meta schema.ClientMeta, r *schema.Resource, c schema.Column) error { + var j map[string]interface{} + field := funk.Get(r.Item, path, funk.WithAllowZero()) + s := reflect.ValueOf(field) + if s.IsNil() { + return r.Set(c.Name, j) + } + j = make(map[string]interface{}) + if reflect.TypeOf(field).Kind() != reflect.Slice { + return diag.WrapError(fmt.Errorf("field: %s is not a slice", path)) + } + for i := 0; i < s.Len(); i++ { + key := funk.Get(s.Index(i).Interface(), keyPath, funk.WithAllowZero()) + value := funk.Get(s.Index(i).Interface(), valuePath, funk.WithAllowZero()) + k := reflect.ValueOf(key) + if k.Kind() == reflect.Ptr { + k = k.Elem() + } + if k.Kind() != reflect.String { + return diag.WrapError(fmt.Errorf("key field: %s is not a string", path)) + } + j[k.String()] = value + } + return r.Set(c.Name, j) + } +} diff --git a/client/resolvers_test.go b/client/resolvers_test.go index 19631277c..62701c650 100644 --- a/client/resolvers_test.go +++ b/client/resolvers_test.go @@ -12,6 +12,11 @@ import ( "github.com/stretchr/testify/assert" ) +type SliceJsonStruct struct { + Value []types1.Tag + Nested *SliceJsonStruct +} + func TestResolveTags(t *testing.T) { cases := []struct { InputItem interface{} @@ -62,3 +67,83 @@ func TestResolveTags(t *testing.T) { assert.Equal(t, tc.ExpectedTags, r.Get(ta.Columns[0].Name)) } } + +func TestResolveSliceJson(t *testing.T) { + cases := []struct { + InputItem interface{} + ExpectedData map[string]interface{} + path string + keyPath string + valuePath string + }{ + { + InputItem: types1.ListWebhookItem{ // non-ptr + Tags: []types1.Tag{ + { + Key: aws.String("k1"), + Value: aws.String("v1"), + }, + }, + }, + ExpectedData: map[string]interface{}{"k1": aws.String("v1")}, + path: "Tags", + keyPath: "Key", + valuePath: "Value", + }, + { + InputItem: &types2.EventSubscription{ // ptr + Tags: []types2.Tag{ + { + Key: aws.String("k2"), + Value: aws.String("v2"), + }, + }, + }, + ExpectedData: map[string]interface{}{"k2": aws.String("v2")}, + path: "Tags", + keyPath: "Key", + valuePath: "Value", + }, + { + InputItem: SliceJsonStruct{Nested: &SliceJsonStruct{ + Nested: &SliceJsonStruct{ + Value: []types1.Tag{{ + Key: aws.String("k1"), + Value: aws.String("v1"), + }, { + Key: aws.String("k2"), + Value: aws.String("v2"), + }}, + }, + }}, + ExpectedData: map[string]interface{}{"k1": aws.String("v1"), "k2": aws.String("v2")}, + path: "Nested.Nested.Value", + keyPath: "Key", + valuePath: "Value", + }, + { + InputItem: types1.ListWebhookItem{ // non-ptr, nil + Tags: nil, + }, + ExpectedData: nil, + path: "Tags", + keyPath: "Key", + valuePath: "Value", + }, + } + + for _, tc := range cases { + ta := &schema.Table{ + Columns: []schema.Column{ + { + Name: "tags", + Type: schema.TypeJSON, + }, + }, + } + r := schema.NewResourceData(schema.PostgresDialect{}, ta, nil, tc.InputItem, nil, time.Now()) + err := SliceJsonResolver(tc.path, tc.keyPath, tc.valuePath)(context.Background(), nil, r, ta.Columns[0]) + assert.NoError(t, err) + assert.Equal(t, tc.ExpectedData, r.Get(ta.Columns[0].Name)) + } +} diff --git a/client/services.go b/client/services.go index 29ec7923f..33b375ff4 100644 --- a/client/services.go +++ b/client/services.go @@ -787,6 +787,7 @@ type GlueClient interface { GetDevEndpoints(ctx context.Context, params *glue.GetDevEndpointsInput, optFns ...func(*glue.Options)) (*glue.GetDevEndpointsOutput, error) GetCrawlers(ctx context.Context, params *glue.GetCrawlersInput, optFns ...func(*glue.Options)) (*glue.GetCrawlersOutput, error) GetSecurityConfigurations(ctx context.Context, params *glue.GetSecurityConfigurationsInput, optFns ...func(*glue.Options)) (*glue.GetSecurityConfigurationsOutput, error) + GetPartitionIndexes(ctx context.Context, params *glue.GetPartitionIndexesInput, optFns ...func(*glue.Options)) (*glue.GetPartitionIndexesOutput, error) GetClassifiers(ctx context.Context, params *glue.GetClassifiersInput, optFns ...func(*glue.Options)) (*glue.GetClassifiersOutput, error) GetConnections(ctx context.Context, params *glue.GetConnectionsInput, optFns ...func(*glue.Options)) (*glue.GetConnectionsOutput, error) } diff --git a/docs/tables/aws_glue_database_table_columns.md b/docs/tables/aws_glue_database_table_columns.md new file mode 100644 index 000000000..d499c368a --- /dev/null +++ b/docs/tables/aws_glue_database_table_columns.md @@ -0,0 +1,11 @@ + +# Table: aws_glue_database_table_columns +A column in a Table +## Columns +| Name | Type | Description | +| ------------- | ------------- | ----- | +|database_table_cq_id|uuid|Unique CloudQuery ID of aws_glue_database_tables table (FK)| +|name|text|The name of the Column| +|comment|text|A free-form text comment| +|parameters|jsonb|These key-value pairs define properties associated with the column| +|type|text|The data type of the Column| diff --git a/docs/tables/aws_glue_database_table_indexes.md b/docs/tables/aws_glue_database_table_indexes.md new file mode 100644 index 000000000..f183cf5b1 --- /dev/null +++ b/docs/tables/aws_glue_database_table_indexes.md @@ -0,0 +1,11 @@ + +# Table: aws_glue_database_table_indexes +A descriptor for a partition index in a table +## Columns +| Name | Type | Description | +| ------------- | ------------- | ----- | +|database_table_cq_id|uuid|Unique CloudQuery ID of aws_glue_database_tables table (FK)| +|index_name|text|The name of the partition index| +|index_status|text|The status of the partition index| +|keys|jsonb|A list of one or more keys, as KeySchemaElement structures, for the partition index| +|backfill_errors|jsonb|A list of errors that can occur when registering partition indexes for an existing table| diff --git a/docs/tables/aws_glue_database_tables.md b/docs/tables/aws_glue_database_tables.md index e218c65b4..2c740dd49 100644 --- a/docs/tables/aws_glue_database_tables.md +++ b/docs/tables/aws_glue_database_tables.md @@ -5,6 +5,8 @@ Represents a collection of related data organized in columns and rows | Name | Type | Description | | ------------- | ------------- | ----- | |database_cq_id|uuid|Unique CloudQuery ID of aws_glue_databases table (FK)| +|parameters|jsonb|| +|storage_parameters|jsonb|| |name|text|The table name| |catalog_id|text|The ID of the Data Catalog in which the table resides| |create_time|timestamp without time zone|The time when the table definition was created in the Data Catalog| @@ -15,9 +17,21 @@ Represents a collection of related data organized in columns and rows |last_access_time|timestamp without time zone|The last time that the table was accessed| |last_analyzed_time|timestamp without time zone|The last time that column statistics were computed for this table| |owner|text|The owner of the table| -|parameters|jsonb|These key-value pairs define properties associated with the table| |retention|bigint|The retention time for this table| -|storage_descriptor|jsonb|A storage descriptor containing information about the physical storage of this table| +|additional_locations|text[]|A list of locations that point to the path where a Delta table is located| +|bucket_columns|text[]|A list of reducer grouping columns, clustering columns, and bucketing columns in the table| +|compressed|boolean|True if the data in the table is compressed, or False if not| +|input_format|text|The input format: SequenceFileInputFormat (binary), or TextInputFormat, or a custom format| +|location|text|The physical location of the table| +|number_of_buckets|bigint|Must be specified if the table contains any dimension columns| +|output_format|text|The output format: SequenceFileOutputFormat (binary), or IgnoreKeyTextOutputFormat, or a custom format| +|schema_reference_schema_id|jsonb|A structure that contains schema identity fields| +|schema_reference_schema_version_id|text|The unique ID assigned to a version of the schema| +|schema_reference_schema_version_number|bigint|The version number of the schema| +|serde_info|jsonb|The serialization/deserialization (SerDe) information| +|skewed_info|jsonb|The information about values that appear frequently in a column (skewed values)| +|sort_columns|jsonb|A list specifying the sort order of each bucket in the table| +|stored_as_sub_directories|boolean|True if the table data is stored in subdirectories, or False if not| |table_type|text|The type of this table (EXTERNAL_TABLE, VIRTUAL_VIEW, etc)| |target_table_catalog_id|text|The ID of the Data Catalog in which the table resides| |target_table_database_name|text|The name of the catalog database that contains the target table| diff --git a/resources/services/glue/databases.go b/resources/services/glue/databases.go index ac72e4718..9a961abec 100644 --- a/resources/services/glue/databases.go +++ b/resources/services/glue/databases.go @@ -100,6 +100,16 @@ func Databases() *schema.Table { Type: schema.TypeUUID, Resolver: schema.ParentIdResolver, }, + { + Name: "parameters", + Type: schema.TypeJSON, + Resolver: schema.PathResolver("Parameters"), + }, + { + Name: "storage_parameters", + Type: schema.TypeJSON, + Resolver: schema.PathResolver("StorageDescriptor.Parameters"), + }, { Name: "name", Description: "The table name", @@ -151,19 +161,93 @@ func Databases() *schema.Table { Type: schema.TypeString, }, { - Name: "parameters", - Description: "These key-value pairs define properties associated with the table", + Name: "retention", + Description: "The retention time for this table", + Type: schema.TypeBigInt, + }, + { + Name: "additional_locations", + Description: "A list of locations that point to the path where a Delta table is located", + Type: schema.TypeStringArray, + Resolver: schema.PathResolver("StorageDescriptor.AdditionalLocations"), + }, + { + Name: "bucket_columns", + Description: "A list of reducer grouping columns, clustering columns, and bucketing columns in the table", + Type: schema.TypeStringArray, + Resolver: schema.PathResolver("StorageDescriptor.BucketColumns"), + }, + { + Name: "compressed", + Description: "True if the data in the table is compressed, or False if not", + Type: schema.TypeBool, + Resolver: schema.PathResolver("StorageDescriptor.Compressed"), + }, + { + Name: "input_format", + Description: "The input format: SequenceFileInputFormat (binary), or TextInputFormat, or a custom format", + Type: schema.TypeString, + Resolver: schema.PathResolver("StorageDescriptor.InputFormat"), + }, + { + Name: "location", + Description: "The physical location of the table", + Type: schema.TypeString, + Resolver: schema.PathResolver("StorageDescriptor.Location"), + }, + { + Name: "number_of_buckets", + Description: "Must be specified if the table contains any dimension columns", + Type: schema.TypeBigInt, + Resolver: schema.PathResolver("StorageDescriptor.NumberOfBuckets"), + }, + { + Name: "output_format", + Description: "The output format: SequenceFileOutputFormat (binary), or IgnoreKeyTextOutputFormat, or a custom format", + Type: schema.TypeString, + Resolver: schema.PathResolver("StorageDescriptor.OutputFormat"), + }, + { + Name: "schema_reference_schema_id", + Description: "A structure that contains schema identity fields", Type: schema.TypeJSON, + Resolver: schema.PathResolver("StorageDescriptor.SchemaReference.SchemaId"), }, { - Name: "retention", - Description: "The retention time for this table", + Name: "schema_reference_schema_version_id", + Description: "The unique ID assigned to a version of the schema", + Type: schema.TypeString, + Resolver: schema.PathResolver("StorageDescriptor.SchemaReference.SchemaVersionId"), + }, + { + Name: "schema_reference_schema_version_number", + Description: "The version number of the schema", Type: schema.TypeBigInt, + Resolver: schema.PathResolver("StorageDescriptor.SchemaReference.SchemaVersionNumber"), + }, + { + Name: "serde_info", + Description: "The serialization/deserialization (SerDe) information", + Type: schema.TypeJSON, + Resolver: schema.PathResolver("StorageDescriptor.SerdeInfo"), }, { - Name: "storage_descriptor", - Description: "A storage descriptor containing information about the physical storage of this table", + Name: "skewed_info", + Description: "The information about values that appear frequently in a column (skewed values)", Type: schema.TypeJSON, + Resolver: schema.PathResolver("StorageDescriptor.SkewedInfo"), + }, + { + Name: "sort_columns", + Description: "A list specifying the sort order of each bucket in the table", + Type: schema.TypeJSON, + Resolver: schema.PathResolver("StorageDescriptor.SortColumns"), + }, + { + Name: "stored_as_sub_directories", + Description: "True if the table data is stored in subdirectories, or False if not", + Type: schema.TypeBool, + Resolver: schema.PathResolver("StorageDescriptor.StoredAsSubDirectories"), }, { Name: "table_type", @@ -243,6 +327,73 @@ func Databases() *schema.Table { }, }, }, + { + Name: "aws_glue_database_table_columns", + Description: "A column in a Table", + Resolver: schema.PathTableResolver("StorageDescriptor.Columns"), + Columns: []schema.Column{ + { + Name: "database_table_cq_id", + Description: "Unique CloudQuery ID of aws_glue_database_tables table (FK)", + Type: schema.TypeUUID, + Resolver: schema.ParentIdResolver, + }, + { + Name: "name", + Description: "The name of the Column", + Type: schema.TypeString, + }, + { + Name: "comment", + Description: "A free-form text comment", + Type: schema.TypeString, + }, + { + Name: "parameters", + Description: "These key-value pairs define properties associated with the column", + Type: schema.TypeJSON, + }, + { + Name: "type", + Description: "The data type of the Column", + Type: schema.TypeString, + }, + }, + }, + { + Name: "aws_glue_database_table_indexes", + Description: "A descriptor for a partition index in a table", + Resolver: fetchGlueDatabaseTableIndexes, + Columns: []schema.Column{ + { + Name: "database_table_cq_id", + Description: "Unique CloudQuery ID of aws_glue_database_tables table (FK)", + Type: schema.TypeUUID, + Resolver: schema.ParentIdResolver, + }, + { + Name: "index_name", + Description: "The name of the partition index", + Type: schema.TypeString, + }, + { + Name: "index_status", + Description: "The status of the partition index", + Type: schema.TypeString, + }, + { + Name: "keys", + Description: "A list of one or more keys, as KeySchemaElement structures, for the partition index", + Type: schema.TypeJSON, + Resolver: client.SliceJsonResolver("Keys", "Name", "Type"), + }, + { + Name: "backfill_errors", + Description: "A list of errors that can occur when registering partition indexes for an existing table", + Type: schema.TypeJSON, + }, + }, + }, }, }, }, @@ -295,6 +446,25 @@ func fetchGlueDatabaseTables(ctx context.Context, meta schema.ClientMeta, parent } return nil } +func fetchGlueDatabaseTableIndexes(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error { + cl := meta.(*client.Client) + svc := cl.Services().Glue + d := parent.Parent.Item.(types.Database) + t := parent.Item.(types.Table) + input := glue.GetPartitionIndexesInput{DatabaseName: d.Name, CatalogId: d.CatalogId, TableName: t.Name} + for { + result, err := svc.GetPartitionIndexes(ctx, &input) + if err != nil { + return diag.WrapError(err) + } + res <- result.PartitionIndexDescriptorList + if aws.ToString(result.NextToken) == "" { + break + } + input.NextToken = result.NextToken + } + return nil +} // ==================================================================================================================== // User Defined Helpers diff --git a/resources/services/glue/databases.hcl b/resources/services/glue/databases.hcl index acb1f066e..6d908b3a9 100644 --- a/resources/services/glue/databases.hcl +++ b/resources/services/glue/databases.hcl @@ -45,8 +45,60 @@ resource "aws" "glue" "databases" { path = "github.com/aws/aws-sdk-go-v2/service/glue/types.Table" column "storage_descriptor" { + skip_prefix = true + } + + column "skewed_info" { + type = "json" + } + + column "sort_columns" { + type = "json" + } + + column "schema_reference_schema_id" { + type = "json" + } + + column "serde_info" { type = "json" } + + column "parameters" { + skip = true + } + + userDefinedColumn "parameters" { + type = "json" + resolver "resolverSliceToJson" { + path = "github.com/cloudquery/cq-provider-sdk/provider/schema.PathResolver" + params = ["Parameters"] + } + } + + userDefinedColumn "storage_parameters" { + type = "json" + resolver "resolverSliceToJson" { + path = "github.com/cloudquery/cq-provider-sdk/provider/schema.PathResolver" + params = ["StorageDescriptor.Parameters"] + } + } + + user_relation "aws" "glue" "indexes" { + path = "github.com/aws/aws-sdk-go-v2/service/glue/types.PartitionIndexDescriptor" + + column "keys" { + type = "json" + resolver "resolverSliceToJson" { + path = "github.com/cloudquery/cq-provider-aws/client.SliceJsonResolver" + params = ["Keys", "Name", "Type"] + } + } + + column "backfill_errors" { + type = "json" + } + } } column "target_database_database_name" { diff --git a/resources/services/glue/databases_mock_test.go b/resources/services/glue/databases_mock_test.go index 164d3e35b..55528a70d 100644 --- a/resources/services/glue/databases_mock_test.go +++ b/resources/services/glue/databases_mock_test.go @@ -24,6 +24,11 @@ func buildDatabasesMock(t *testing.T, ctrl *gomock.Controller) client.Services { tb.NextToken = nil m.EXPECT().GetTables(gomock.Any(), gomock.Any()).Return(&tb, nil) + i := glue.GetPartitionIndexesOutput{} + require.NoError(t, faker.FakeData(&i)) + i.NextToken = nil + m.EXPECT().GetPartitionIndexes(gomock.Any(), gomock.Any()).Return(&i, nil) + return client.Services{ Glue: m, }