diff --git a/plugins/extractors/cassandra/README.md b/plugins/extractors/cassandra/README.md index 1ccb49189..a34a9b7f0 100644 --- a/plugins/extractors/cassandra/README.md +++ b/plugins/extractors/cassandra/README.md @@ -10,34 +10,39 @@ source: password: 1234 host: localhost port: 9042 + exclude: + keyspaces: [mykeyspace] + tables: [mykeyspace_2.tableName_1] ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `user_id` | `string` | `admin` | User ID to access the cassandra server| *required* | -| `password` | `string` | `1234` | Password for the cassandra Server | *required* | -| `host` | `string` | `127.0.0.1` | The Host address at which server is running | *required* | -| `port` | `int` | `9042` | The Port number at which server is running | *required* | +| Key | Value | Example | Description | | +| :------------------ | :--------- | :---------------------- | :--------------------------------------------- | :--------- | +| `user_id` | `string` | `admin` | User ID to access the cassandra server | _required_ | +| `password` | `string` | `1234` | Password for the cassandra Server | _required_ | +| `host` | `string` | `127.0.0.1` | The Host address at which server is running | _required_ | +| `port` | `int` | `9042` | The Port number at which server is running | _required_ | +| `exclude.keyspcaes` | `[]string` | `[keyspace1,keyspace2]` | List of keyspaces to be excluded from crawling | _optional_ | +| `exclude.tables` | `[]string` | `[keyspace3.table1]` | List of tables to be excluded from crawling | _optional_ | ## Outputs -| Field | Sample Value | -| :---- | :---- | -| `resource.urn` | `my_keyspace.my_table` | -| `resource.name` | `my_table` | -| `resource.service` | `cassandra` | -| `description` | `table description` | -| `profile.total_rows` | `2100` | -| `schema` | [][Column](#column) | +| Field | Sample Value | +| :------------------- | :--------------------- | +| `resource.urn` | `my_keyspace.my_table` | +| `resource.name` | `my_table` | +| `resource.service` | `cassandra` | +| `description` | `table description` | +| `profile.total_rows` | `2100` | +| `schema` | [][column](#column) | ### Column -| Field | Sample Value | -| :---- | :---- | +| Field | Sample Value | +| :----- | :------------ | | `name` | `total_price` | -| `type` | `text` | +| `type` | `text` | ## Contributing diff --git a/plugins/extractors/cassandra/cassandra.go b/plugins/extractors/cassandra/cassandra.go index 205071032..8d820fe56 100644 --- a/plugins/extractors/cassandra/cassandra.go +++ b/plugins/extractors/cassandra/cassandra.go @@ -10,7 +10,6 @@ import ( "github.com/gocql/gocql" "github.com/odpf/meteor/models" - _ "github.com/odpf/meteor/models" v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" "github.com/odpf/meteor/plugins/sqlutil" @@ -37,10 +36,16 @@ const ( // Config holds the set of configuration for the cassandra extractor type Config struct { - UserID string `json:"user_id" yaml:"user_id" mapstructure:"user_id" validate:"required"` - Password string `json:"password" yaml:"password" mapstructure:"password" validate:"required"` - Host string `json:"host" yaml:"host" mapstructure:"host" validate:"required"` - Port int `json:"port" yaml:"port" mapstructure:"port" validate:"required"` + UserID string `json:"user_id" yaml:"user_id" mapstructure:"user_id" validate:"required"` + Password string `json:"password" yaml:"password" mapstructure:"password" validate:"required"` + Host string `json:"host" yaml:"host" mapstructure:"host" validate:"required"` + Port int `json:"port" yaml:"port" mapstructure:"port" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` +} + +type Exclude struct { + Keyspaces []string `json:"keyspaces" yaml:"keyspaces" mapstructure:"keyspaces"` + Tables []string `json:"tables" yaml:"tables" mapstructure:"tables"` } var sampleConfig = ` @@ -61,6 +66,7 @@ var info = plugins.Info{ type Extractor struct { plugins.BaseExtractor excludedKeyspaces map[string]bool + excludeTables map[string]bool logger log.Logger config Config session *gocql.Session @@ -84,7 +90,9 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) } // build excluded database list - e.excludedKeyspaces = sqlutil.BuildBoolMap(defaultKeyspaceList) + excludedKeyspacesList := append(defaultKeyspaceList, e.config.Exclude.Keyspaces...) + e.excludedKeyspaces = sqlutil.BuildBoolMap(excludedKeyspacesList) + e.excludeTables = sqlutil.BuildBoolMap(e.config.Exclude.Tables) // connect to cassandra cluster := gocql.NewCluster(e.config.Host) @@ -144,6 +152,9 @@ func (e *Extractor) extractTables(keyspace string) (err error) { if err = scanner.Scan(&tableName); err != nil { return errors.Wrapf(err, "failed to iterate over %s", tableName) } + if e.isExcludedTable(keyspace, tableName) { + continue + } if err = e.processTable(keyspace, tableName); err != nil { return errors.Wrap(err, "failed to process table") } @@ -212,6 +223,12 @@ func (e *Extractor) isExcludedKeyspace(keyspace string) bool { return ok } +func (e *Extractor) isExcludedTable(keyspace, table string) bool { + tableName := fmt.Sprintf("%s.%s", keyspace, table) + _, ok := e.excludeTables[tableName] + return ok +} + // init register the extractor to the catalog func init() { if err := registry.Extractors.Register("cassandra", func() plugins.Extractor { diff --git a/plugins/extractors/clickhouse/README.md b/plugins/extractors/clickhouse/README.md index 372eef401..bea3fab48 100644 --- a/plugins/extractors/clickhouse/README.md +++ b/plugins/extractors/clickhouse/README.md @@ -6,33 +6,38 @@ source: name: clickhouse config: - connection_url: tcp://localhost:3306?username=admin&password=pass123&debug=true + connection_url: clickhouse://username:password@clickhouse-server:9000 + exclude: + databases: [database_a, database_b] + tables: [database_c.table_a] ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `connection_url` | `string` | `tcp://localhost:3306?username=admin&password=pass123&debug=true` | URL to access the clickhouse server | *required* | +| Key | Value | Example | Description | | +| :------------------ | :--------- | :---------------------------------------------------------------- | :---------------------------------- | :--------- | +| `connection_url` | `string` | `tcp://localhost:3306?username=admin&password=pass123&debug=true` | URL to access the clickhouse server | _required_ | +| `exclude.databases` | `[]string` | `[database_a`, `database_b]` | List of databases to be excluded | _optional_ | +| `exclude.tables` | `[]string` | `[database_c.table_a, database_c.table_b]` | List of tables to be excluded | _optional_ | ## Outputs -| Field | Sample Value | -| :---- | :---- | -| `resource.urn` | `my_database.my_table` | -| `resource.name` | `my_table` | -| `resource.service` | `clickhouse` | -| `description` | `table description` | -| `profile.total_rows` | `2100` | -| `schema` | [][Column](#column) | +| Field | Sample Value | +| :------------------- | :--------------------- | +| `resource.urn` | `my_database.my_table` | +| `resource.name` | `my_table` | +| `resource.service` | `clickhouse` | +| `description` | `table description` | +| `profile.total_rows` | `2100` | +| `schema` | [][column](#column) | ### Column -| Field | Sample Value | -| :---- | :---- | -| `name` | `total_price` | +| Field | Sample Value | +| :------------ | :------------------- | +| `name` | `total_price` | | `description` | `item's total price` | -| `data_type` | `String` | +| `data_type` | `String` | ## Contributing diff --git a/plugins/extractors/clickhouse/clickhouse.go b/plugins/extractors/clickhouse/clickhouse.go index 7592d78db..3d339fb0f 100644 --- a/plugins/extractors/clickhouse/clickhouse.go +++ b/plugins/extractors/clickhouse/clickhouse.go @@ -13,6 +13,7 @@ import ( "github.com/odpf/meteor/models" v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" "github.com/odpf/meteor/plugins" + "github.com/odpf/meteor/plugins/sqlutil" "github.com/odpf/meteor/registry" "github.com/odpf/salt/log" ) @@ -22,10 +23,19 @@ var summary string // Config holds the connection URL for the extractor type Config struct { - ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` } -var sampleConfig = `connection_url: "tcp://localhost:3306?username=admin&password=pass123&debug=true"` +type Exclude struct { + Databases []string `json:"databases" yaml:"databases" mapstructure:"databases"` + Tables []string `json:"tables" yaml:"tables" mapstructure:"tables"` +} + +var sampleConfig = `connection_url: "tcp://localhost:3306?username=admin&password=pass123&debug=true" +exclude: + databases: [database_a, database_b] + tables: [dataset_c.table_a]` var info = plugins.Info{ Description: "Column-oriented DBMS for online analytical processing.", @@ -38,9 +48,11 @@ var info = plugins.Info{ // and logger interface for the extractor type Extractor struct { plugins.BaseExtractor - config Config - logger log.Logger - db *sql.DB + config Config + logger log.Logger + excludedDBs map[string]bool + excludedTbl map[string]bool + db *sql.DB } // New returns a pointer to an initialized Extractor Object @@ -59,6 +71,10 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) return err } + // initialize excluded databases and tables + e.excludedDBs = sqlutil.BuildBoolMap(e.config.Exclude.Databases) + e.excludedTbl = sqlutil.BuildBoolMap(e.config.Exclude.Tables) + if e.db, err = sql.Open("clickhouse", e.config.ConnectionURL); err != nil { return errors.Wrap(err, "failed to create a client") } @@ -91,6 +107,11 @@ func (e *Extractor) extractTables(emit plugins.Emit) (err error) { return } + // skip excluded databases and tables + if e.excludedDBs[dbName] || e.excludedTbl[fmt.Sprintf("%s.%s", dbName, tableName)] { + continue + } + var columns []*v1beta2.Column columns, err = e.getColumnsInfo(dbName, tableName) if err != nil { diff --git a/plugins/extractors/couchdb/README.md b/plugins/extractors/couchdb/README.md index 4e6189050..cdf87840e 100644 --- a/plugins/extractors/couchdb/README.md +++ b/plugins/extractors/couchdb/README.md @@ -7,31 +7,33 @@ source: name: couchdb config: connection_url: http://admin:pass123@localhost:3306/ + exclude: database_a,database_b ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `connection_url` | `string` | `http://admin:pass123@localhost:3306/` | URL to access the couchdb server | *required* | +| Key | Value | Example | Description | | +| :--------------- | :------- | :------------------------------------- | :--------------------------------------------------------- | :--------- | +| `connection_url` | `string` | `http://admin:pass123@localhost:3306/` | URL to access the couchdb server | _required_ | +| `exclude` | `string` | `primaryDB,secondaryDB` | Comma separated database list to be excluded from crawling | _optional_ | ## Outputs -| Field | Sample Value | -| :---- | :---- | -| `resource.urn` | `database_name.docID` | -| `resource.name` | `docID` | -| `resource.service` | `couchdb` | -| `schema` | [][Column](#column) | +| Field | Sample Value | +| :----------------- | :-------------------- | +| `resource.urn` | `database_name.docID` | +| `resource.name` | `docID` | +| `resource.service` | `couchdb` | +| `schema` | [][column](#column) | ### Column -| Field | Sample Value | -| :---- | :---- | -| `name` | `field1` | +| Field | Sample Value | +| :------------ | :------------------------- | +| `name` | `field1` | | `description` | `rev for revision history` | -| `data_type` | `float64` | -| `length` | `` | +| `data_type` | `float64` | +| `length` | `` | ## Contributing diff --git a/plugins/extractors/couchdb/couchdb.go b/plugins/extractors/couchdb/couchdb.go index 2a0bbc565..c5faef8db 100644 --- a/plugins/extractors/couchdb/couchdb.go +++ b/plugins/extractors/couchdb/couchdb.go @@ -5,12 +5,14 @@ import ( _ "embed" // used to print the embedded assets "fmt" "reflect" + "strings" _ "github.com/go-kivik/couchdb" "github.com/go-kivik/kivik" "github.com/odpf/meteor/models" v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" "github.com/odpf/meteor/plugins" + "github.com/odpf/meteor/plugins/sqlutil" "github.com/odpf/meteor/registry" "github.com/odpf/salt/log" "google.golang.org/protobuf/types/known/anypb" @@ -28,9 +30,12 @@ var defaultDBList = []string{ // Config holds the connection URL for the extractor type Config struct { ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + Exclude string `json:"exclude" yaml:"exclude" mapstructure:"exclude"` } -var sampleConfig = `connection_url: http://admin:pass123@localhost:3306/` +var sampleConfig = ` +connection_url: http://admin:pass123@localhost:3306/ +exclude: database_a,database_b` var info = plugins.Info{ Description: "Table metadata from CouchDB server,", @@ -67,7 +72,8 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) } // build excluded database list - e.buildExcludedDBs() + excludedList := append(defaultDBList, strings.Split(e.config.Exclude, ",")...) + e.excludedDbs = sqlutil.BuildBoolMap(excludedList) // create client e.client, err = kivik.New("couch", e.config.ConnectionURL) @@ -90,6 +96,9 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) } for _, dbName := range res { + if e.isExcludedDB(dbName) { + continue + } if err := e.extractTables(ctx, dbName); err != nil { return err } @@ -99,10 +108,6 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) // Extract tables from a given database func (e *Extractor) extractTables(ctx context.Context, dbName string) (err error) { - // skip if database is default - if e.isExcludedDB(dbName) { - return - } e.db = e.client.DB(ctx, dbName) // extract documents @@ -176,15 +181,6 @@ func (e *Extractor) extractColumns(ctx context.Context, docID string) (columns [ return } -func (e *Extractor) buildExcludedDBs() { - excludedMap := make(map[string]bool) - for _, db := range defaultDBList { - excludedMap[db] = true - } - - e.excludedDbs = excludedMap -} - func (e *Extractor) isExcludedDB(database string) bool { _, ok := e.excludedDbs[database] return ok diff --git a/plugins/extractors/gcs/README.md b/plugins/extractors/gcs/README.md index 53fbe9f76..3cf213fff 100644 --- a/plugins/extractors/gcs/README.md +++ b/plugins/extractors/gcs/README.md @@ -4,7 +4,7 @@ ```yaml source: - name: googlecloudstorage + name: gcs config: project_id: google-project-id extract_blob: true @@ -18,50 +18,52 @@ source: "auth_uri": "https://accounts.google.com/o/oauth2/auth", "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "xxxxxxx", - "client_x509_cert_url": "xxxxxxx" + "client_x509_cert_url": "xxxxxxx", } + exclude: [bucket_a, bucket_b] ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `project_id` | `string` | `my-project` | BigQuery Project ID | *required* | -| `extract_blob` | `boolean` | `true` | Extract blob metadata inside a bucket | *optional* | -| `credentials_json` | `string` | `{"private_key": .., "private_id": ...}` | Service Account in JSON string | *optional* | +| Key | Value | Example | Description | | +| :----------------- | :--------- | :--------------------------------------- | :---------------------------------------------- | :--------- | +| `project_id` | `string` | `my-project` | Cloud Storage Project ID | _required_ | +| `extract_blob` | `boolean` | `true` | Extract blob metadata inside a bucket | _optional_ | +| `credentials_json` | `string` | `{"private_key": .., "private_id": ...}` | Service Account in JSON string | _optional_ | +| `exclude` | `[]string` | `["bucket_a","bucket_b"]` | Array of bucket names to excluded from crawling | _optional_ | -### *Notes* +### _Notes_ Leaving `credentials_json` blank will default to [Google's default authentication](https://cloud.google.com/docs/authentication/production#automatically). It is recommended if Meteor instance runs inside the same Google Cloud environment as the Google Cloud Storage project. ## Outputs -| Field | Sample Value | -| :---- | :---- | -| `resource.urn` | `project_id/bucket_name` | -| `resource.name` | `bucket_name` | -| `resource.service` | `googlecloudstorage` | -| `location` | `ASIA` | -| `storage_type` | `STANDARD` | -| `labels` | []{`key`:`value`} | -| `timestamps.created_at.seconds` | `1551082913` | -| `timestamps.created_at.nanos` | `1551082913` | +| Field | Sample Value | +| :------------------------------ | :----------------------- | +| `resource.urn` | `project_id/bucket_name` | +| `resource.name` | `bucket_name` | +| `resource.service` | `gcs` | +| `location` | `ASIA` | +| `storage_type` | `STANDARD` | +| `labels` | []{`key`:`value`} | +| `timestamps.created_at.seconds` | `1551082913` | +| `timestamps.created_at.nanos` | `1551082913` | ### Blob -| Field | Sample Value | -| :---- | :---- | -| `urn` | `project_id/bucket_name/blob_path` | -| `name` | `blob_path` | -| `size` | `311` | -| `deleted_at.seconds` | `1551082913` | -| `expired_at.seconds` | `1551082913` | -| `labels` | []{`key`:`value`} | -| `ownership.owners` | []{`name`:`serviceaccountname@project.gserviceaccount.com`} | -| `timestamps.created_at.seconds` | `1551082913` | -| `timestamps.created_at.nanos` | `1551082913` | -| `timestamps.updated_at.seconds` | `1551082913` | -| `timestamps.updated_at.nanos` | `1551082913` | +| Field | Sample Value | +| :------------------------------ | :---------------------------------------------------------- | +| `urn` | `project_id/bucket_name/blob_path` | +| `name` | `blob_path` | +| `size` | `311` | +| `deleted_at.seconds` | `1551082913` | +| `expired_at.seconds` | `1551082913` | +| `labels` | []{`key`:`value`} | +| `ownership.owners` | []{`name`:`serviceaccountname@project.gserviceaccount.com`} | +| `timestamps.created_at.seconds` | `1551082913` | +| `timestamps.created_at.nanos` | `1551082913` | +| `timestamps.updated_at.seconds` | `1551082913` | +| `timestamps.updated_at.nanos` | `1551082913` | ## Contributing diff --git a/plugins/extractors/gcs/gcs.go b/plugins/extractors/gcs/gcs.go index f3366931a..d465184c4 100644 --- a/plugins/extractors/gcs/gcs.go +++ b/plugins/extractors/gcs/gcs.go @@ -9,6 +9,7 @@ import ( "github.com/odpf/meteor/models" v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" + "github.com/odpf/meteor/plugins/sqlutil" "github.com/odpf/meteor/registry" "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/timestamppb" @@ -25,9 +26,10 @@ var summary string // Config holds the set of configuration for the extractor type Config struct { - ProjectID string `json:"project_id" yaml:"project_id" mapstructure:"project_id" validate:"required"` - ServiceAccountJSON string `json:"service_account_json" yaml:"service_account_json" mapstructure:"service_account_json"` - ExtractBlob bool `json:"extract_blob" yaml:"extract_blob" mapstructure:"extract_blob"` + ProjectID string `json:"project_id" yaml:"project_id" mapstructure:"project_id" validate:"required"` + ServiceAccountJSON string `json:"service_account_json" yaml:"service_account_json" mapstructure:"service_account_json"` + ExtractBlob bool `json:"extract_blob" yaml:"extract_blob" mapstructure:"extract_blob"` + Exclude []string `json:"exclude" yaml:"exclude" mapstructure:"exclude"` } var sampleConfig = ` @@ -44,7 +46,8 @@ service_account_json: |- "token_uri": "https://oauth2.googleapis.com/token", "auth_provider_x509_cert_url": "xxxxxxx", "client_x509_cert_url": "xxxxxxx" - }` + } +exclude: [bucket_a,bucket_b]` var info = plugins.Info{ Description: "Online file storage service By Google", @@ -57,9 +60,10 @@ var info = plugins.Info{ // from the google cloud storage type Extractor struct { plugins.BaseExtractor - client *storage.Client - logger log.Logger - config Config + client *storage.Client + logger log.Logger + excludedBuckets map[string]bool + config Config } // New returns a pointer to an initialized Extractor Object @@ -79,6 +83,9 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) return err } + // build excluded buckets map + e.excludedBuckets = sqlutil.BuildBoolMap(e.config.Exclude) + // create client e.client, err = e.createClient(ctx) if err != nil { @@ -99,6 +106,11 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) return errors.Wrapf(err, "failed to iterate over %s", bucket.Name) } + // skip excluded buckets + if e.excludedBuckets[bucket.Name] { + continue + } + var blobs []*v1beta2.Blob if e.config.ExtractBlob { blobs, err = e.extractBlobs(ctx, bucket.Name, e.config.ProjectID) diff --git a/plugins/extractors/grafana/README.md b/plugins/extractors/grafana/README.md index 21d12405e..20befbdf3 100644 --- a/plugins/extractors/grafana/README.md +++ b/plugins/extractors/grafana/README.md @@ -8,39 +8,47 @@ source: config: base_url: grafana_server api_key: your_api_key + exclude: + dashboards: + - dashboard_ud_1 + - dashboard_ud_2 + panels: + - dashboard_uid_3.panel_id_1 ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `base_url` | `string` | `http://localhost:3000` | URL of the Grafana server | *required* | -| `api_key` | `string` | `Bearer qweruqwryqwLKJ` | API key to access Grafana API | *required* | +| Key | Value | Example | Description | | +| :------------------- | :--------- | :--------------------------------- | :--------------------------------------------- | :--------- | +| `base_url` | `string` | `http://localhost:3000` | URL of the Grafana server | _required_ | +| `api_key` | `string` | `Bearer qweruqwryqwLKJ` | API key to access Grafana API | _required_ | +| `exclude.dashboards` | `[]string` | `[dashboard_ud_1, dashboard_ud_2]` | List of dasboards to be excluded from crawling | _optional_ | +| `exclude.panels` | `[]string` | `[dashboard_uid_3.panel_id_1]` | List of panels to be excluded from crawling | _optional_ | ## Outputs -| Field | Sample Value | -| :---- | :---- | -| `resource.urn` | `grafana.HzK8qNW7z` | -| `resource.name` | `new-dashboard-copy` | -| `resource.service` | `grafana` | -| `resource.url` | `http://localhost:3000/d/HzK8qNW7z/new-dashboard-copy` | -| `charts` | [][chart](#chart) | +| Field | Sample Value | +| :----------------- | :----------------------------------------------------- | +| `resource.urn` | `grafana.HzK8qNW7z` | +| `resource.name` | `new-dashboard-copy` | +| `resource.service` | `grafana` | +| `resource.url` | `http://localhost:3000/d/HzK8qNW7z/new-dashboard-copy` | +| `charts` | [][chart](#chart) | ### Chart -| Field | Sample Value | -| :---- | :---- | -| `urn` | `5WsKOvW7z.4` | -| `name` | `Panel Random` | -| `type` | `table` | -| `source` | `grafana` | -| `description` | `random description for this panel` | -| `url` | `http://localhost:3000/d/5WsKOvW7z/test-dashboard-updated?viewPanel=4` | -| `data_source` | `postgres` | -| `raw_query` | `SELECT\n urn,\n created_at AS \"time\"\nFROM resources\nORDER BY 1` | -| `dashboard_urn` | `grafana.5WsKOvW7z` | -| `dashboard_source` | `grafana` | +| Field | Sample Value | +| :----------------- | :--------------------------------------------------------------------- | +| `urn` | `5WsKOvW7z.4` | +| `name` | `Panel Random` | +| `type` | `table` | +| `source` | `grafana` | +| `description` | `random description for this panel` | +| `url` | `http://localhost:3000/d/5WsKOvW7z/test-dashboard-updated?viewPanel=4` | +| `data_source` | `postgres` | +| `raw_query` | `SELECT\n urn,\n created_at AS \"time\"\nFROM resources\nORDER BY 1` | +| `dashboard_urn` | `grafana.5WsKOvW7z` | +| `dashboard_source` | `grafana` | ## Contributing diff --git a/plugins/extractors/grafana/grafana.go b/plugins/extractors/grafana/grafana.go index c1cd1360f..09fdc709b 100644 --- a/plugins/extractors/grafana/grafana.go +++ b/plugins/extractors/grafana/grafana.go @@ -12,6 +12,7 @@ import ( "github.com/odpf/meteor/models" v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" "github.com/odpf/meteor/plugins" + "github.com/odpf/meteor/plugins/sqlutil" "github.com/odpf/meteor/registry" "github.com/odpf/salt/log" ) @@ -21,13 +22,22 @@ var summary string // Config holds the set of configuration for the grafana extractor type Config struct { - BaseURL string `json:"base_url" yaml:"base_url" mapstructure:"base_url" validate:"required"` - APIKey string `json:"api_key" yaml:"api_key" mapstructure:"api_key" validate:"required"` + BaseURL string `json:"base_url" yaml:"base_url" mapstructure:"base_url" validate:"required"` + APIKey string `json:"api_key" yaml:"api_key" mapstructure:"api_key" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` +} + +type Exclude struct { + Dashboards []string `json:"dashboards" yaml:"dashboards" mapstructure:"dashboards"` + Panels []string `json:"panels" yaml:"panels" mapstructure:"panels"` } var sampleConfig = ` base_url: grafana_server -api_key: your_api_key` +api_key: your_api_key +exclude: + dashboards: [dashboard_uid_1, dashboard_uid_2] + panels: [dashboard_uid_3.panel_id_1]` var info = plugins.Info{ Description: "Dashboard list from Grafana server.", @@ -39,9 +49,11 @@ var info = plugins.Info{ // Extractor manages the communication with the Grafana Server type Extractor struct { plugins.BaseExtractor - client *Client - config Config - logger log.Logger + client *Client + config Config + excludedDashboards map[string]bool + excludedPanels map[string]bool + logger log.Logger } // New returns a pointer to an initialized Extractor Object @@ -60,6 +72,10 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) return err } + // build excluded dashboards and panels map + e.excludedDashboards = sqlutil.BuildBoolMap(e.config.Exclude.Dashboards) + e.excludedPanels = sqlutil.BuildBoolMap(e.config.Exclude.Panels) + // build client e.client = NewClient(&http.Client{}, e.config) @@ -79,6 +95,10 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) } for _, dashboardDetail := range dashboardDetails { + // skip excluded dashboard uids + if e.excludedDashboards[dashboardDetail.Dashboard.UID] { + continue + } dashboard, err := e.grafanaDashboardToMeteorDashboard(dashboardDetail) if err != nil { return errors.Wrap(err, "failed to build Any struct") @@ -91,10 +111,16 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) // grafanaDashboardToMeteorDashboard converts a grafana dashboard to a meteor dashboard func (e *Extractor) grafanaDashboardToMeteorDashboard(dashboard DashboardDetail) (*v1beta2.Asset, error) { - charts := make([]*v1beta2.Chart, len(dashboard.Dashboard.Panels)) - for i, panel := range dashboard.Dashboard.Panels { + charts := make([]*v1beta2.Chart, 0) + for _, panel := range dashboard.Dashboard.Panels { + + // skip excluded panel ids + panelUID := fmt.Sprintf("%s.%d", dashboard.Dashboard.UID, panel.ID) + if e.excludedPanels[panelUID] { + continue + } c := e.grafanaPanelToMeteorChart(panel, dashboard.Dashboard.UID, dashboard.Meta.URL) - charts[i] = &c + charts = append(charts, &c) } data, err := anypb.New(&v1beta2.Dashboard{ Charts: charts, diff --git a/plugins/extractors/mariadb/README.md b/plugins/extractors/mariadb/README.md index e7f4962c1..4484f7c5c 100644 --- a/plugins/extractors/mariadb/README.md +++ b/plugins/extractors/mariadb/README.md @@ -7,34 +7,42 @@ source: type: mariadb config: connection_url: admin:pass123@tcp(localhost:3306)/ + exclude: + databases: + - database_a + - database_b + tables: + - database_c.table_a ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `connection_url` | `string` | `admin:pass123@tcp(localhost:3306)/` | URL to access the mariadb server | *required* | +| Key | Value | Example | Description | | +| :------------------ | :--------- | :--------------------------------------------- | :------------------------------- | :--------- | +| `connection_url` | `string` | `admin:pass123@tcp(localhost:3306)/` | URL to access the mariadb server | _required_ | +| `exclude.databases` | `[]string` | `[`database_a`, `database_b`]` | List of databases to be excluded | _optional_ | +| `exclude.tables` | `[]string` | `[`database_c.table_a`, `database_c.table_b`]` | List of tables to be excluded | _optional_ | ## Outputs -| Field | Sample Value | -| :---- | :---- | -| `resource.urn` | `my_database.my_table` | -| `resource.name` | `my_table` | -| `resource.service` | `mariadb` | -| `description` | `table description` | -| `profile.total_rows` | `1100` | -| `schema` | [][Column](#column) | +| Field | Sample Value | +| :------------------- | :--------------------- | +| `resource.urn` | `my_database.my_table` | +| `resource.name` | `my_table` | +| `resource.service` | `mariadb` | +| `description` | `table description` | +| `profile.total_rows` | `1100` | +| `schema` | [][column](#column) | ### Column -| Field | Sample Value | -| :---- | :---- | -| `name` | `total_price` | +| Field | Sample Value | +| :------------ | :------------------- | +| `name` | `total_price` | | `description` | `item's total price` | -| `data_type` | `decimal` | -| `is_nullable` | `true` | -| `length` | `11` | +| `data_type` | `decimal` | +| `is_nullable` | `true` | +| `length` | `11` | ## Contributing diff --git a/plugins/extractors/mariadb/mariadb.go b/plugins/extractors/mariadb/mariadb.go index 98f8b6957..04db86020 100644 --- a/plugins/extractors/mariadb/mariadb.go +++ b/plugins/extractors/mariadb/mariadb.go @@ -31,10 +31,20 @@ var defaultDBList = []string{ // Config holds the connection URL for the extractor type Config struct { - ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` } -var sampleConfig = `connection_url: "admin:pass123@tcp(localhost:3306)/"` +type Exclude struct { + Databases []string `json:"databases" yaml:"databases" mapstructure:"databases"` + Tables []string `json:"tables" yaml:"tables" mapstructure:"tables"` +} + +var sampleConfig = ` +connection_url: admin:pass123@tcp(localhost:3306)/ +exclude: + databases: [database_a,database_b] + tables: [table_a,table_b]` var info = plugins.Info{ Description: "Table metadata from Mariadb server.", @@ -47,6 +57,7 @@ var info = plugins.Info{ type Extractor struct { plugins.BaseExtractor excludedDbs map[string]bool + excludedTbl map[string]bool logger log.Logger config Config db *sql.DB @@ -69,8 +80,10 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) return err } - // build excluded database list - e.excludedDbs = sqlutil.BuildBoolMap(defaultDBList) + // build excluded database and tables list + excludeDBList := append(defaultDBList, e.config.Exclude.Databases...) + e.excludedDbs = sqlutil.BuildBoolMap(excludeDBList) + e.excludedTbl = sqlutil.BuildBoolMap(e.config.Exclude.Tables) // create mariadb client if e.db, err = sql.Open("mysql", e.config.ConnectionURL); err != nil { @@ -93,6 +106,10 @@ func (e *Extractor) Extract(_ context.Context, emit plugins.Emit) (err error) { // Iterate through all tables and databases for _, database := range dbs { + // skip excluded databases + if e.isExcludedDB(database) { + continue + } if err := e.extractTables(database); err != nil { return errors.Wrapf(err, "failed to extract tables from %s", database) } @@ -102,11 +119,6 @@ func (e *Extractor) Extract(_ context.Context, emit plugins.Emit) (err error) { // extractTables extracts tables from a given database func (e *Extractor) extractTables(database string) (err error) { - // skip if database is default - if e.isExcludedDB(database) { - return - } - // extract tables _, err = e.db.Exec(fmt.Sprintf("USE %s;", database)) if err != nil { @@ -116,6 +128,10 @@ func (e *Extractor) extractTables(database string) (err error) { // get list of tables tables, err := sqlutil.FetchTablesInDB(e.db, database, "SHOW TABLES;") for _, tableName := range tables { + // skip excluded tables + if e.isExcludedTable(tableName, database) { + continue + } if err := e.processTable(database, tableName); err != nil { return errors.Wrap(err, "failed to process table") } @@ -184,6 +200,13 @@ func (e *Extractor) isExcludedDB(database string) bool { return ok } +// isExcludedTable checks if the given table is in the list of excluded tables +func (e *Extractor) isExcludedTable(tableName, database string) bool { + tableName = fmt.Sprintf("%s.%s", database, tableName) + _, ok := e.excludedTbl[tableName] + return ok +} + // isNullable returns true if the string is "YES" func (e *Extractor) isNullable(value string) bool { return value == "YES" diff --git a/plugins/extractors/mongodb/README.md b/plugins/extractors/mongodb/README.md index aabcbbffe..d66dd7fbf 100644 --- a/plugins/extractors/mongodb/README.md +++ b/plugins/extractors/mongodb/README.md @@ -7,6 +7,12 @@ source: name: mongodb config: connection_url: mongodb://admin:pass123@localhost:3306 + exclude: + databases: + - database_a + - database_b + collections: + - database_c.collection_a ``` ## Inputs @@ -14,6 +20,8 @@ source: | Key | Value | Example | Description | | | :-- | :---- | :------ | :---------- | :- | | `connection_url` | `string` | `mongodb://admin:pass123@localhost:3306` | URL to access the mongodb server | *required* | +| `exclude.databases` | `[]string` | `[`database_a`, `database_b`]` | List of databases to be excluded | *optional* | +| `exclude.collections` | `[]string` | `[`database_c.collection_a`, `database_c.collection_b`]` | List of collections to be excluded | *optional* | ## Outputs diff --git a/plugins/extractors/mongodb/mongodb.go b/plugins/extractors/mongodb/mongodb.go index 8bbbc88f1..aa626c10b 100644 --- a/plugins/extractors/mongodb/mongodb.go +++ b/plugins/extractors/mongodb/mongodb.go @@ -31,10 +31,23 @@ var defaultCollections = []string{ // Config holds the connection URL for the extractor type Config struct { - ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` } -var sampleConfig = `connection_url: "mongodb://admin:pass123@localhost:3306"` +type Exclude struct { + Databases []string `json:"databases" yaml:"databases" mapstructure:"databases"` + Collections []string `json:"collections" yaml:"collections" mapstructure:"collections"` +} + +var sampleConfig = ` +connection_url: "mongodb://admin:pass123@localhost:3306" +exclude: + databases: + - database_a + - database_b + collections: + - dataset_c.table_a` var info = plugins.Info{ Description: "Collection metadata from MongoDB Server", @@ -47,10 +60,9 @@ var info = plugins.Info{ type Extractor struct { plugins.BaseExtractor // internal states - client *mongo.Client - excluded map[string]bool - logger log.Logger - config Config + client *mongo.Client + logger log.Logger + config Config } // New returns a pointer to an initialized Extractor Object @@ -68,9 +80,6 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) return err } - // build excluded list - e.buildExcludedCollections() - // setup client if e.client, err = createAndConnnectClient(ctx, e.config.ConnectionURL); err != nil { return errors.Wrap(err, "failed to create client") @@ -87,6 +96,10 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) } for _, dbName := range databases { + if e.isExcludedDB(dbName, e.config.Exclude.Databases) { + continue + } + database := e.client.Database(dbName) if err := e.extractCollections(ctx, database, emit); err != nil { return errors.Wrap(err, "failed to extract collections") @@ -108,8 +121,8 @@ func (e *Extractor) extractCollections(ctx context.Context, db *mongo.Database, // or else test might fail sort.Strings(collections) for _, collectionName := range collections { - // skip if collection is default mongo - if e.isDefaultCollection(collectionName) { + // skip if collection is default mongo or is in the user's exclude collection list + if e.isExcludedCollection(collectionName, db.Name(), e.config.Exclude.Collections) { continue } @@ -152,20 +165,35 @@ func (e *Extractor) buildTable(ctx context.Context, db *mongo.Database, collecti return } -// Build a map of excluded collections using list of collection names -func (e *Extractor) buildExcludedCollections() { - excluded := make(map[string]bool) - for _, collection := range defaultCollections { - excluded[collection] = true +// Check if collection is default or in user's exclude list +func (e *Extractor) isExcludedCollection(collName string, dbName string, excludedCollections []string) bool { + collectionName := fmt.Sprintf("%s.%s", dbName, collName) + + // check if collection is in the user's exclude list (dbName.CollectionName) + for _, c := range excludedCollections { + if c == collectionName { + return true + } + } + // check if collection is default mongo collection (like *.system.version, *.system.users) + for _, c := range defaultCollections { + if c == collName { + return true + } } - e.excluded = excluded + return false } -// Check if collection is default using stored map -func (e *Extractor) isDefaultCollection(collectionName string) bool { - _, ok := e.excluded[collectionName] - return ok +// isExcludedDB checks if the given db is in the list of excluded databases +func (e *Extractor) isExcludedDB(dbName string, excludeDatabases []string) bool { + for _, d := range excludeDatabases { + if d == dbName { + return true + } + } + + return false } // Create mongo client and tries to connect diff --git a/plugins/extractors/mssql/README.md b/plugins/extractors/mssql/README.md index b8f8b2758..0a557710a 100644 --- a/plugins/extractors/mssql/README.md +++ b/plugins/extractors/mssql/README.md @@ -7,34 +7,42 @@ source: name: mssql config: connection_url: sqlserver://admin:pass123@localhost:3306/ + exclude: + databases: + - database_a + - database_b + tables: + - database_c.table_a ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `identifier` | `string` | `my-mssql` | Instance alias, the value will be used as part of the urn component | *required* | +| Key | Value | Example | Description | | +| :------------------ | :--------- | :--------------------------------------------- | :------------------------------- | :--------- | +| `connection_url` | `string` | `sqlserver://admin:pass123@localhost:3306/` | URL to access the mssql server | _required_ | +| `exclude.databases` | `[]string` | `[`database_a`, `database_b`]` | List of databases to be excluded | _optional_ | +| `exclude.tables` | `[]string` | `[`database_c.table_a`, `database_c.table_b`]` | List of tables to be excluded | _optional_ | ## Outputs -| Field | Sample Value | -| :---- | :---- | -| `resource.urn` | `mssql::my-mssql/my_database/my_table` | -| `resource.name` | `my_table` | -| `resource.service` | `mssql` | -| `description` | `table description` | -| `profile.total_rows` | `2100` | -| `schema` | [][Column](#column) | +| Field | Sample Value | +| :------------------- | :------------------------------------- | +| `resource.urn` | `mssql::my-mssql/my_database/my_table` | +| `resource.name` | `my_table` | +| `resource.service` | `mssql` | +| `description` | `table description` | +| `profile.total_rows` | `2100` | +| `schema` | [][column](#column) | ### Column -| Field | Sample Value | -| :---- | :---- | -| `name` | `total_price` | +| Field | Sample Value | +| :------------ | :------------------- | +| `name` | `total_price` | | `description` | `item's total price` | -| `data_type` | `decimal` | -| `is_nullable` | `true` | -| `length` | `12,2` | +| `data_type` | `decimal` | +| `is_nullable` | `true` | +| `length` | `12,2` | ## Contributing diff --git a/plugins/extractors/mssql/mssql.go b/plugins/extractors/mssql/mssql.go index 7cc4d661c..39024c2f9 100644 --- a/plugins/extractors/mssql/mssql.go +++ b/plugins/extractors/mssql/mssql.go @@ -33,7 +33,13 @@ var defaultDBList = []string{ // Config holds the connection URL for the extractor type Config struct { - ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` +} + +type Exclude struct { + Databases []string `json:"databases" yaml:"databases" mapstructure:"databases"` + Tables []string `json:"tables" yaml:"tables" mapstructure:"tables"` } var sampleConfig = `connection_url: "sqlserver://admin:pass123@localhost:3306/"` @@ -49,6 +55,7 @@ var info = plugins.Info{ type Extractor struct { plugins.BaseExtractor excludedDbs map[string]bool + excludedTbl map[string]bool logger log.Logger db *sql.DB config Config @@ -72,7 +79,9 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) } // build excluded database list - e.excludedDbs = sqlutil.BuildBoolMap(defaultDBList) + excludeDBList := append(defaultDBList, e.config.Exclude.Databases...) + e.excludedDbs = sqlutil.BuildBoolMap(excludeDBList) + e.excludedTbl = sqlutil.BuildBoolMap(e.config.Exclude.Tables) // create client if e.db, err = sql.Open("mssql", e.config.ConnectionURL); err != nil { @@ -96,7 +105,6 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) if e.isExcludedDB(database) { continue } - tableQuery := fmt.Sprintf(`SELECT TABLE_NAME FROM %s.INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';`, database) tables, err := sqlutil.FetchTablesInDB(e.db, database, tableQuery) if err != nil { @@ -105,6 +113,9 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) } for _, tableName := range tables { + if e.isExcludedTable(tableName, database) { + continue + } if err := e.processTable(database, tableName); err != nil { return errors.Wrap(err, "failed to process Table") } @@ -177,6 +188,13 @@ func (e *Extractor) isExcludedDB(database string) bool { return ok } +// isExcludedTable checks if the given table is in the list of excluded tables +func (e *Extractor) isExcludedTable(tableName, database string) bool { + tableName = fmt.Sprintf("%s.%s", database, tableName) + _, ok := e.excludedTbl[tableName] + return ok +} + // isNullable checks if the given string is null or not func (e *Extractor) isNullable(value string) bool { return value == "YES" diff --git a/plugins/extractors/mysql/README.md b/plugins/extractors/mysql/README.md index ae6c78328..a9065a637 100644 --- a/plugins/extractors/mysql/README.md +++ b/plugins/extractors/mysql/README.md @@ -7,6 +7,12 @@ source: name: mysql config: connection_url: admin:pass123@tcp(localhost:3306)/ + exclude: + databases: + - database_a + - database_b + tables: + - database_c.table_a ``` ## Inputs @@ -14,7 +20,8 @@ source: | Key | Value | Example | Description | | | :-- | :---- | :------ | :---------- | :- | | `connection_url` | `string` | `admin:pass123@tcp(localhost:3306)/` | URL to access the mysql server | *required* | -| `identifier` | `string` | `my-mysql` | Instance alias, the value will be used as part of the urn component | *required* | +| `exclude.databases` | `[]string` | `[`database_a`, `database_b`]` | List of databases to be excluded | *optional* | +| `exclude.tables` | `[]string` | `[`database_c.table_a`, `database_c.table_b`]` | List of tables to be excluded | *optional* | ## Outputs diff --git a/plugins/extractors/mysql/mysql.go b/plugins/extractors/mysql/mysql.go index 65a0a040f..eeba58ff9 100644 --- a/plugins/extractors/mysql/mysql.go +++ b/plugins/extractors/mysql/mysql.go @@ -32,10 +32,23 @@ var defaultDBList = []string{ // Config holds the connection URL for the extractor type Config struct { - ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` } -var sampleConfig = `connection_url: "admin:pass123@tcp(localhost:3306)/"` +type Exclude struct { + Databases []string `json:"databases" yaml:"databases" mapstructure:"databases"` + Tables []string `json:"tables" yaml:"tables" mapstructure:"tables"` +} + +var sampleConfig = ` +connection_url: "admin:pass123@tcp(localhost:3306)/" +exclude: + databases: + - database_a + - database_b + tables: + - dataset_c.table_a` var info = plugins.Info{ Description: "Table metadata from MySQL server.", @@ -48,6 +61,7 @@ var info = plugins.Info{ type Extractor struct { plugins.BaseExtractor excludedDbs map[string]bool + excludedTbl map[string]bool logger log.Logger config Config db *sql.DB @@ -70,8 +84,9 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) return err } - // build excluded database list - e.excludedDbs = sqlutil.BuildBoolMap(defaultDBList) + excludeDBList := append(defaultDBList, e.config.Exclude.Databases...) + e.excludedDbs = sqlutil.BuildBoolMap(excludeDBList) + e.excludedTbl = sqlutil.BuildBoolMap(e.config.Exclude.Tables) // create client if e.db, err = sql.Open("mysql", e.config.ConnectionURL); err != nil { @@ -93,6 +108,11 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) } for _, db := range dbs { + // skip excluded databases + if e.isExcludedDB(db) { + continue + } + // extract tables err := e.extractTables(db) if err != nil { e.logger.Error("failed to get tables, skipping database", "error", err) @@ -105,11 +125,6 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) (err error) // Extract tables from a given database func (e *Extractor) extractTables(database string) (err error) { - // skip if database is default - if e.isExcludedDB(database) { - return - } - // set database _, err = e.db.Exec(fmt.Sprintf("USE %s;", database)) if err != nil { @@ -119,6 +134,10 @@ func (e *Extractor) extractTables(database string) (err error) { // get list of tables tables, err := sqlutil.FetchTablesInDB(e.db, database, "SHOW TABLES;") for _, tableName := range tables { + // skip excluded tables + if e.isExcludedTable(database, tableName) { + continue + } if err := e.processTable(database, tableName); err != nil { return errors.Wrap(err, "failed to process table") } @@ -191,6 +210,13 @@ func (e *Extractor) isExcludedDB(database string) bool { return ok } +// isExcludedTable checks if the given table is in the list of excluded tables +func (e *Extractor) isExcludedTable(database, tableName string) bool { + tableName = fmt.Sprintf("%s.%s", database, tableName) + _, ok := e.excludedTbl[tableName] + return ok +} + // isNullable checks if the given string is null or not func (e *Extractor) isNullable(value string) bool { return value == "YES" diff --git a/plugins/extractors/snowflake/README.md b/plugins/extractors/snowflake/README.md index 114704e70..a8bedf7b3 100644 --- a/plugins/extractors/snowflake/README.md +++ b/plugins/extractors/snowflake/README.md @@ -7,28 +7,36 @@ source: type: snowflake config: connection_url: user:password@my_organization-my_account/mydb + exclude: + databases: + - database_a + - database_b + tables: + - database_c.table_a ``` ## Inputs -| Key | Value | Example | Description | | -| :-- | :---- | :------ | :---------- | :- | -| `connection_url` | `string` | `user:password@org22-acc123/mydb` | URL to access the snowflake server | *required* | +| Key | Value | Example | Description | | +| :------------------ | :--------- | :--------------------------------------------- | :--------------------------------- | :--------- | +| `connection_url` | `string` | `user:password@org22-acc123/mydb` | URL to access the snowflake server | _required_ | +| `exclude.databases` | `[]string` | `[`database_a`, `database_b`]` | List of databases to be excluded | _optional_ | +| `exclude.tables` | `[]string` | `[`database_c.table_a`, `database_c.table_b`]` | List of tables to be excluded | _optional_ | ## Outputs | Field | Sample Value | -|:-------------------|:-----------------------| +| :----------------- | :--------------------- | | `resource.urn` | `my_database.my_table` | | `resource.name` | `my_table` | | `resource.service` | `snowflake` | | `description` | `table description` | -| `schema` | [][Column](#column) | +| `schema` | [][column](#column) | ### Column | Field | Sample Value | -|:--------------|:---------------------| +| :------------ | :------------------- | | `name` | `total_price` | | `description` | `item's total price` | | `data_type` | `decimal` | diff --git a/plugins/extractors/snowflake/snowflake.go b/plugins/extractors/snowflake/snowflake.go index 987899afe..c7960fc44 100644 --- a/plugins/extractors/snowflake/snowflake.go +++ b/plugins/extractors/snowflake/snowflake.go @@ -9,6 +9,7 @@ import ( "github.com/odpf/meteor/models" "github.com/odpf/meteor/plugins" + "github.com/odpf/meteor/plugins/sqlutil" "github.com/odpf/meteor/registry" "github.com/odpf/salt/log" "github.com/snowflakedb/gosnowflake" @@ -23,7 +24,13 @@ var summary string // Config holds the connection URL for the extractor type Config struct { - ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + ConnectionURL string `json:"connection_url" yaml:"connection_url" mapstructure:"connection_url" validate:"required"` + Exclude Exclude `json:"exclude" yaml:"exclude" mapstructure:"exclude"` +} + +type Exclude struct { + Databases []string `json:"databases" yaml:"databases" mapstructure:"databases"` + Tables []string `json:"tables" yaml:"tables" mapstructure:"tables"` } var sampleConfig = `connection_url: "user:password@my_organization-my_account/mydb"` @@ -39,6 +46,8 @@ type Extractor struct { plugins.BaseExtractor logger log.Logger config Config + excludedDbs map[string]bool + excludedTbl map[string]bool httpTransport http.RoundTripper db *sql.DB emit plugins.Emit @@ -74,6 +83,10 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) (err error) return err } + // build excluded database list + e.excludedDbs = sqlutil.BuildBoolMap(e.config.Exclude.Databases) + e.excludedTbl = sqlutil.BuildBoolMap(e.config.Exclude.Tables) + if e.httpTransport == nil { // create snowflake client if e.db, err = sql.Open("snowflake", e.config.ConnectionURL); err != nil { @@ -112,6 +125,10 @@ func (e *Extractor) Extract(_ context.Context, emit plugins.Emit) (err error) { if err = dbs.Scan(&createdOn, &name, &isDefault, &isCurrent, &origin, &owner, &comment, &options, &retentionTime); err != nil { return fmt.Errorf("failed to scan database %s: %w", name, err) } + // skip excluded databases + if e.excludedDbs[name] { + continue + } if err = e.extractTables(name); err != nil { return fmt.Errorf("failed to extract tables from %s: %w", name, err) } @@ -141,6 +158,12 @@ func (e *Extractor) extractTables(database string) (err error) { &bytes, &owner, &retentionTime, &autoClustering, &changeTracking, &isExternal); err != nil { return err } + + // skip excluded tables + TableName := fmt.Sprintf("%s.%s", database, name) + if e.excludedTbl[TableName] { + continue + } if err = e.processTable(database, name); err != nil { return err }