From d7722e502ce8102dc5122a7b24f0ebf2fe91c8b5 Mon Sep 17 00:00:00 2001 From: Andrii Romanenko Date: Fri, 5 Aug 2022 21:19:33 +0300 Subject: [PATCH] feat: Added glue classifiers (#1389) --- client/mocks/glue.go | 20 ++ client/services.go | 1 + docs/tables/aws_glue_classifiers.md | 37 +++ resources/provider/provider.go | 1 + resources/services/glue/classifiers.go | 251 ++++++++++++++++++++ resources/services/glue/classifiers.hcl | 48 ++++ resources/services/glue/classifiers_test.go | 30 +++ 7 files changed, 388 insertions(+) create mode 100644 docs/tables/aws_glue_classifiers.md create mode 100644 resources/services/glue/classifiers.go create mode 100644 resources/services/glue/classifiers.hcl create mode 100644 resources/services/glue/classifiers_test.go diff --git a/client/mocks/glue.go b/client/mocks/glue.go index 450ee3dea..a195a0912 100644 --- a/client/mocks/glue.go +++ b/client/mocks/glue.go @@ -35,6 +35,26 @@ func (m *MockGlueClient) EXPECT() *MockGlueClientMockRecorder { return m.recorder } +// GetClassifiers mocks base method. +func (m *MockGlueClient) GetClassifiers(arg0 context.Context, arg1 *glue.GetClassifiersInput, arg2 ...func(*glue.Options)) (*glue.GetClassifiersOutput, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "GetClassifiers", varargs...) + ret0, _ := ret[0].(*glue.GetClassifiersOutput) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetClassifiers indicates an expected call of GetClassifiers. +func (mr *MockGlueClientMockRecorder) GetClassifiers(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetClassifiers", reflect.TypeOf((*MockGlueClient)(nil).GetClassifiers), varargs...) +} + // GetCrawlers mocks base method. func (m *MockGlueClient) GetCrawlers(arg0 context.Context, arg1 *glue.GetCrawlersInput, arg2 ...func(*glue.Options)) (*glue.GetCrawlersOutput, error) { m.ctrl.T.Helper() diff --git a/client/services.go b/client/services.go index 3859a1c62..7ba48d34c 100644 --- a/client/services.go +++ b/client/services.go @@ -768,6 +768,7 @@ type GlueClient interface { GetDevEndpoints(ctx context.Context, params *glue.GetDevEndpointsInput, optFns ...func(*glue.Options)) (*glue.GetDevEndpointsOutput, error) GetCrawlers(ctx context.Context, params *glue.GetCrawlersInput, optFns ...func(*glue.Options)) (*glue.GetCrawlersOutput, error) GetSecurityConfigurations(ctx context.Context, params *glue.GetSecurityConfigurationsInput, optFns ...func(*glue.Options)) (*glue.GetSecurityConfigurationsOutput, error) + GetClassifiers(ctx context.Context, params *glue.GetClassifiersInput, optFns ...func(*glue.Options)) (*glue.GetClassifiersOutput, error) } //go:generate mockgen -package=mocks -destination=./mocks/kinesis.go . KinesisClient diff --git a/docs/tables/aws_glue_classifiers.md b/docs/tables/aws_glue_classifiers.md new file mode 100644 index 000000000..21f5c5073 --- /dev/null +++ b/docs/tables/aws_glue_classifiers.md @@ -0,0 +1,37 @@ + +# Table: aws_glue_classifiers +Classifiers are triggered during a crawl task +## Columns +| Name | Type | Description | +| ------------- | ------------- | ----- | +|account_id|text|The AWS Account ID of the resource| +|region|text|The AWS Region of the resource| +|name|text|Name of the classifier| +|csv_classifier_name|text|The name of the classifier| +|csv_classifier_allow_single_column|boolean|Enables the processing of files that contain only one column| +|csv_classifier_contains_header|text|Indicates whether the CSV file contains a header| +|csv_classifier_creation_time|timestamp without time zone|The time that this classifier was registered| +|csv_classifier_delimiter|text|A custom symbol to denote what separates each column entry in the row| +|csv_classifier_disable_value_trimming|boolean|Specifies not to trim values before identifying the type of column values| +|csv_classifier_header|text[]|A list of strings representing column names| +|csv_classifier_last_updated|timestamp without time zone|The time that this classifier was last updated| +|csv_classifier_quote_symbol|text|A custom symbol to denote what combines content into a single column value| +|csv_classifier_version|bigint|The version of this classifier| +|grok_classifier_classification|text|An identifier of the data format that the classifier matches, such as Twitter, JSON, Omniture logs, and so on| +|grok_classifier_grok_pattern|text|The grok pattern applied to a data store by this classifier| +|grok_classifier_name|text|The name of the classifier| +|grok_classifier_creation_time|timestamp without time zone|The time that this classifier was registered| +|grok_classifier_custom_patterns|text|Optional custom grok patterns defined by this classifier| +|grok_classifier_last_updated|timestamp without time zone|The time that this classifier was last updated| +|grok_classifier_version|bigint|The version of this classifier| +|json_classifier_json_path|text|A JsonPath string defining the JSON data for the classifier to classify| +|json_classifier_name|text|The name of the classifier| +|json_classifier_creation_time|timestamp without time zone|The time that this classifier was registered| +|json_classifier_last_updated|timestamp without time zone|The time that this classifier was last updated| +|json_classifier_version|bigint|The version of this classifier| +|xml_classifier_classification|text|An identifier of the data format that the classifier matches| +|xml_classifier_name|text|The name of the classifier| +|xml_classifier_creation_time|timestamp without time zone|The time that this classifier was registered| +|xml_classifier_last_updated|timestamp without time zone|The time that this classifier was last updated| +|xml_classifier_row_tag|text|The XML tag designating the element that contains each record in an XML document being parsed| +|xml_classifier_version|bigint|The version of this classifier| diff --git a/resources/provider/provider.go b/resources/provider/provider.go index 911f565ab..96d9e0a61 100644 --- a/resources/provider/provider.go +++ b/resources/provider/provider.go @@ -169,6 +169,7 @@ func Provider() *provider.Provider { "emr.block_public_access_configs": emr.EmrBlockPublicAccessConfigs(), "emr.clusters": emr.EmrClusters(), "fsx.backups": fsx.FsxBackups(), + "glue.classifiers": glue.Classifiers(), "glue.crawlers": glue.Crawlers(), "glue.databases": glue.Databases(), "glue.datacatalog_encryption_settings": glue.DatacatalogEncryptionSettings(), diff --git a/resources/services/glue/classifiers.go b/resources/services/glue/classifiers.go new file mode 100644 index 000000000..50d44da33 --- /dev/null +++ b/resources/services/glue/classifiers.go @@ -0,0 +1,251 @@ +package glue + +import ( + "context" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/glue" + "github.com/aws/aws-sdk-go-v2/service/glue/types" + "github.com/cloudquery/cq-provider-aws/client" + "github.com/cloudquery/cq-provider-sdk/provider/diag" + "github.com/cloudquery/cq-provider-sdk/provider/schema" +) + +//go:generate cq-gen --resource classifiers --config classifiers.hcl --output . +func Classifiers() *schema.Table { + return &schema.Table{ + Name: "aws_glue_classifiers", + Description: "Classifiers are triggered during a crawl task", + Resolver: fetchGlueClassifiers, + Multiplex: client.ServiceAccountRegionMultiplexer("glue"), + IgnoreError: client.IgnoreAccessDeniedServiceDisabled, + DeleteFilter: client.DeleteAccountRegionFilter, + Options: schema.TableCreationOptions{PrimaryKeys: []string{"account_id", "region", "name"}}, + Columns: []schema.Column{ + { + Name: "account_id", + Description: "The AWS Account ID of the resource", + Type: schema.TypeString, + Resolver: client.ResolveAWSAccount, + }, + { + Name: "region", + Description: "The AWS Region of the resource", + Type: schema.TypeString, + Resolver: client.ResolveAWSRegion, + }, + { + Name: "name", + Description: "Name of the classifier", + Type: schema.TypeString, + Resolver: resolveGlueClassifierName, + }, + { + Name: "csv_classifier_name", + Description: "The name of the classifier", + Type: schema.TypeString, + Resolver: schema.PathResolver("CsvClassifier.Name"), + }, + { + Name: "csv_classifier_allow_single_column", + Description: "Enables the processing of files that contain only one column", + Type: schema.TypeBool, + Resolver: schema.PathResolver("CsvClassifier.AllowSingleColumn"), + }, + { + Name: "csv_classifier_contains_header", + Description: "Indicates whether the CSV file contains a header", + Type: schema.TypeString, + Resolver: schema.PathResolver("CsvClassifier.ContainsHeader"), + }, + { + Name: "csv_classifier_creation_time", + Description: "The time that this classifier was registered", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("CsvClassifier.CreationTime"), + }, + { + Name: "csv_classifier_delimiter", + Description: "A custom symbol to denote what separates each column entry in the row", + Type: schema.TypeString, + Resolver: schema.PathResolver("CsvClassifier.Delimiter"), + }, + { + Name: "csv_classifier_disable_value_trimming", + Description: "Specifies not to trim values before identifying the type of column values", + Type: schema.TypeBool, + Resolver: schema.PathResolver("CsvClassifier.DisableValueTrimming"), + }, + { + Name: "csv_classifier_header", + Description: "A list of strings representing column names", + Type: schema.TypeStringArray, + Resolver: schema.PathResolver("CsvClassifier.Header"), + }, + { + Name: "csv_classifier_last_updated", + Description: "The time that this classifier was last updated", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("CsvClassifier.LastUpdated"), + }, + { + Name: "csv_classifier_quote_symbol", + Description: "A custom symbol to denote what combines content into a single column value", + Type: schema.TypeString, + Resolver: schema.PathResolver("CsvClassifier.QuoteSymbol"), + }, + { + Name: "csv_classifier_version", + Description: "The version of this classifier", + Type: schema.TypeBigInt, + Resolver: schema.PathResolver("CsvClassifier.Version"), + }, + { + Name: "grok_classifier_classification", + Description: "An identifier of the data format that the classifier matches, such as Twitter, JSON, Omniture logs, and so on", + Type: schema.TypeString, + Resolver: schema.PathResolver("GrokClassifier.Classification"), + }, + { + Name: "grok_classifier_grok_pattern", + Description: "The grok pattern applied to a data store by this classifier", + Type: schema.TypeString, + Resolver: schema.PathResolver("GrokClassifier.GrokPattern"), + }, + { + Name: "grok_classifier_name", + Description: "The name of the classifier", + Type: schema.TypeString, + Resolver: schema.PathResolver("GrokClassifier.Name"), + }, + { + Name: "grok_classifier_creation_time", + Description: "The time that this classifier was registered", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("GrokClassifier.CreationTime"), + }, + { + Name: "grok_classifier_custom_patterns", + Description: "Optional custom grok patterns defined by this classifier", + Type: schema.TypeString, + Resolver: schema.PathResolver("GrokClassifier.CustomPatterns"), + }, + { + Name: "grok_classifier_last_updated", + Description: "The time that this classifier was last updated", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("GrokClassifier.LastUpdated"), + }, + { + Name: "grok_classifier_version", + Description: "The version of this classifier", + Type: schema.TypeBigInt, + Resolver: schema.PathResolver("GrokClassifier.Version"), + }, + { + Name: "json_classifier_json_path", + Description: "A JsonPath string defining the JSON data for the classifier to classify", + Type: schema.TypeString, + Resolver: schema.PathResolver("JsonClassifier.JsonPath"), + }, + { + Name: "json_classifier_name", + Description: "The name of the classifier", + Type: schema.TypeString, + Resolver: schema.PathResolver("JsonClassifier.Name"), + }, + { + Name: "json_classifier_creation_time", + Description: "The time that this classifier was registered", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("JsonClassifier.CreationTime"), + }, + { + Name: "json_classifier_last_updated", + Description: "The time that this classifier was last updated", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("JsonClassifier.LastUpdated"), + }, + { + Name: "json_classifier_version", + Description: "The version of this classifier", + Type: schema.TypeBigInt, + Resolver: schema.PathResolver("JsonClassifier.Version"), + }, + { + Name: "xml_classifier_classification", + Description: "An identifier of the data format that the classifier matches", + Type: schema.TypeString, + Resolver: schema.PathResolver("XMLClassifier.Classification"), + }, + { + Name: "xml_classifier_name", + Description: "The name of the classifier", + Type: schema.TypeString, + Resolver: schema.PathResolver("XMLClassifier.Name"), + }, + { + Name: "xml_classifier_creation_time", + Description: "The time that this classifier was registered", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("XMLClassifier.CreationTime"), + }, + { + Name: "xml_classifier_last_updated", + Description: "The time that this classifier was last updated", + Type: schema.TypeTimestamp, + Resolver: schema.PathResolver("XMLClassifier.LastUpdated"), + }, + { + Name: "xml_classifier_row_tag", + Description: "The XML tag designating the element that contains each record in an XML document being parsed", + Type: schema.TypeString, + Resolver: schema.PathResolver("XMLClassifier.RowTag"), + }, + { + Name: "xml_classifier_version", + Description: "The version of this classifier", + Type: schema.TypeBigInt, + Resolver: schema.PathResolver("XMLClassifier.Version"), + }, + }, + } +} + +// ==================================================================================================================== +// Table Resolver Functions +// ==================================================================================================================== + +func fetchGlueClassifiers(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error { + c := meta.(*client.Client) + svc := c.Services().Glue + input := glue.GetClassifiersInput{} + for { + output, err := svc.GetClassifiers(ctx, &input) + if err != nil { + return diag.WrapError(err) + } + res <- output.Classifiers + + if aws.ToString(output.NextToken) == "" { + break + } + input.NextToken = output.NextToken + } + return nil +} + +// nolint:gocritic +func resolveGlueClassifierName(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { + r := resource.Item.(types.Classifier) + if r.CsvClassifier != nil { + return diag.WrapError(resource.Set(c.Name, r.CsvClassifier.Name)) + } else if r.JsonClassifier != nil { + return diag.WrapError(resource.Set(c.Name, r.JsonClassifier.Name)) + } else if r.GrokClassifier != nil { + return diag.WrapError(resource.Set(c.Name, r.GrokClassifier.Name)) + } else if r.XMLClassifier != nil { + return diag.WrapError(resource.Set(c.Name, r.XMLClassifier.Name)) + } + return nil +} diff --git a/resources/services/glue/classifiers.hcl b/resources/services/glue/classifiers.hcl new file mode 100644 index 000000000..50143161f --- /dev/null +++ b/resources/services/glue/classifiers.hcl @@ -0,0 +1,48 @@ +service = "aws" +output_directory = "." +add_generate = true + +description_modifier "remove_read_only" { + words = [" This member is required"] +} + +resource "aws" "glue" "classifiers" { + path = "github.com/aws/aws-sdk-go-v2/service/glue/types.Classifier" + ignoreError "IgnoreAccessDenied" { + path = "github.com/cloudquery/cq-provider-aws/client.IgnoreAccessDeniedServiceDisabled" + } + deleteFilter "AccountRegionFilter" { + path = "github.com/cloudquery/cq-provider-aws/client.DeleteAccountRegionFilter" + } + multiplex "AwsAccountRegion" { + path = "github.com/cloudquery/cq-provider-aws/client.ServiceAccountRegionMultiplexer" + params = ["glue"] + } + + userDefinedColumn "account_id" { + description = "The AWS Account ID of the resource" + type = "string" + resolver "resolveAWSAccount" { + path = "github.com/cloudquery/cq-provider-aws/client.ResolveAWSAccount" + } + } + userDefinedColumn "region" { + type = "string" + description = "The AWS Region of the resource" + resolver "resolveAWSRegion" { + path = "github.com/cloudquery/cq-provider-aws/client.ResolveAWSRegion" + } + } + + userDefinedColumn "name" { + type = "string" + description = "Name of the classifier" + generate_resolver = true + } + + options { + primary_keys = ["account_id", "region", "name"] + } +} + + diff --git a/resources/services/glue/classifiers_test.go b/resources/services/glue/classifiers_test.go new file mode 100644 index 000000000..ad99310fa --- /dev/null +++ b/resources/services/glue/classifiers_test.go @@ -0,0 +1,30 @@ +package glue + +import ( + "testing" + + "github.com/aws/aws-sdk-go-v2/service/glue" + "github.com/cloudquery/cq-provider-aws/client" + "github.com/cloudquery/cq-provider-aws/client/mocks" + "github.com/cloudquery/faker/v3" + "github.com/golang/mock/gomock" +) + +func buildClassifiers(t *testing.T, ctrl *gomock.Controller) client.Services { + m := mocks.NewMockGlueClient(ctrl) + + var c glue.GetClassifiersOutput + if err := faker.FakeData(&c); err != nil { + t.Fatal(err) + } + c.NextToken = nil + m.EXPECT().GetClassifiers(gomock.Any(), gomock.Any()).Return(&c, nil) + + return client.Services{ + Glue: m, + } +} + +func TestClassifiers(t *testing.T) { + client.AwsMockTestHelper(t, Classifiers(), buildClassifiers, client.TestOptions{}) +}