diff --git a/dlp/categoricalRiskAnalysis.js b/dlp/categoricalRiskAnalysis.js new file mode 100644 index 0000000000..01243ca1c8 --- /dev/null +++ b/dlp/categoricalRiskAnalysis.js @@ -0,0 +1,160 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Categorical Risk Analysis +// description: Computes risk metrics of a column of data in a Google BigQuery table. +// usage: node categoricalRiskAnalysis.js my-project nhtsa_traffic_fatalities accident_2015 state_name my-topic my-subscription bigquery-public-data + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + columnName, + topicId, + subscriptionId +) { + // [START dlp_categorical_stats] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // The name of the column to compute risk metrics for, e.g. 'firstName' + // const columnName = 'firstName'; + async function categoricalRiskAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + categoricalStatsConfig: { + field: { + name: columnName, + }, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const histogramBuckets = + job.riskDetails.categoricalStatsResult.valueFrequencyHistogramBuckets; + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + + // Print bucket stats + console.log( + ` Most common value occurs ${histogramBucket.valueFrequencyUpperBound} time(s)` + ); + console.log( + ` Least common value occurs ${histogramBucket.valueFrequencyLowerBound} time(s)` + ); + + // Print bucket values + console.log(`${histogramBucket.bucketSize} unique values total.`); + histogramBucket.bucketValues.forEach(valueBucket => { + console.log( + ` Value ${getValue(valueBucket.value)} occurs ${ + valueBucket.count + } time(s).` + ); + }); + }); + } + + categoricalRiskAnalysis(); + // [END dlp_categorical_stats] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/createInspectTemplate.js b/dlp/createInspectTemplate.js new file mode 100644 index 0000000000..5e13e694c3 --- /dev/null +++ b/dlp/createInspectTemplate.js @@ -0,0 +1,102 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Inspect Templates +// description: Create a new DLP inspection configuration template. +// usage: node createInspectTemplate.js my-project VERY_LIKELY PERSON_NAME 5 false my-template-id + +function main( + projectId, + templateId, + displayName, + infoTypes, + includeQuote, + minLikelihood, + maxFindings +) { + infoTypes = transformCLI(infoTypes); + // [START dlp_create_inspect_template] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // Whether to include the matching string + // const includeQuote = true; + + // (Optional) The name of the template to be created. + // const templateId = 'my-template'; + + // (Optional) The human-readable name to give the template + // const displayName = 'My template'; + + async function createInspectTemplate() { + // Construct the inspection configuration for the template + const inspectConfig = { + infoTypes: infoTypes, + minLikelihood: minLikelihood, + includeQuote: includeQuote, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }; + + // Construct template-creation request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectTemplate: { + inspectConfig: inspectConfig, + displayName: displayName, + }, + templateId: templateId, + }; + + const [response] = await dlp.createInspectTemplate(request); + const templateName = response.name; + console.log(`Successfully created template ${templateName}.`); + } + createInspectTemplate(); + // [END dlp_create_inspect_template] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + return infoTypes; +} diff --git a/dlp/createTrigger.js b/dlp/createTrigger.js new file mode 100644 index 0000000000..f4f338d4f4 --- /dev/null +++ b/dlp/createTrigger.js @@ -0,0 +1,138 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Job Triggers +// description: Create a Data Loss Prevention API job trigger. +// usage: node createTrigger.js my-project triggerId displayName description bucketName autoPopulateTimespan scanPeriod infoTypes minLikelihood maxFindings + +function main( + projectId, + triggerId, + displayName, + description, + bucketName, + autoPopulateTimespan, + scanPeriod, + infoTypes, + minLikelihood, + maxFindings +) { + infoTypes = transformCLI(infoTypes); + // [START dlp_create_trigger] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // (Optional) The name of the trigger to be created. + // const triggerId = 'my-trigger'; + + // (Optional) A display name for the trigger to be created + // const displayName = 'My Trigger'; + + // (Optional) A description for the trigger to be created + // const description = "This is a sample trigger."; + + // The name of the bucket to scan. + // const bucketName = 'YOUR-BUCKET'; + + // Limit scan to new content only. + // const autoPopulateTimespan = true; + + // How often to wait between scans, in days (minimum = 1 day) + // const scanPeriod = 1; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + async function createTrigger() { + // Get reference to the bucket to be inspected + const storageItem = { + cloudStorageOptions: { + fileSet: {url: `gs://${bucketName}/*`}, + }, + timeSpanConfig: { + enableAutoPopulationOfTimespanConfig: autoPopulateTimespan, + }, + }; + + // Construct job to be triggered + const job = { + inspectConfig: { + infoTypes: infoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItem, + }; + + // Construct trigger creation request + const request = { + parent: `projects/${projectId}/locations/global`, + jobTrigger: { + inspectJob: job, + displayName: displayName, + description: description, + triggers: [ + { + schedule: { + recurrencePeriodDuration: { + seconds: scanPeriod * 60 * 60 * 24, // Trigger the scan daily + }, + }, + }, + ], + status: 'HEALTHY', + }, + triggerId: triggerId, + }; + + // Run trigger creation request + const [trigger] = await dlp.createJobTrigger(request); + console.log(`Successfully created trigger ${trigger.name}.`); + } + + createTrigger(); + // [END dlp_create_trigger] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + return infoTypes; +} diff --git a/dlp/deid.js b/dlp/deid.js deleted file mode 100644 index 8f78a18db3..0000000000 --- a/dlp/deid.js +++ /dev/null @@ -1,609 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -async function deidentifyWithMask( - callingProjectId, - string, - maskingCharacter, - numberToMask -) { - // [START dlp_deidentify_masking] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The string to deidentify - // const string = 'My SSN is 372819127'; - - // (Optional) The maximum number of sensitive characters to mask in a match - // If omitted from the request or set to 0, the API will mask any matching characters - // const numberToMask = 5; - - // (Optional) The character to mask matching sensitive data with - // const maskingCharacter = 'x'; - - // Construct deidentification request - const item = {value: string}; - const request = { - parent: `projects/${callingProjectId}/locations/global`, - deidentifyConfig: { - infoTypeTransformations: { - transformations: [ - { - primitiveTransformation: { - characterMaskConfig: { - maskingCharacter: maskingCharacter, - numberToMask: numberToMask, - }, - }, - }, - ], - }, - }, - item: item, - }; - - try { - // Run deidentification request - const [response] = await dlp.deidentifyContent(request); - const deidentifiedItem = response.item; - console.log(deidentifiedItem.value); - } catch (err) { - console.log(`Error in deidentifyWithMask: ${err.message || err}`); - } - - // [END dlp_deidentify_masking] -} - -async function deidentifyWithDateShift( - callingProjectId, - inputCsvFile, - outputCsvFile, - dateFields, - lowerBoundDays, - upperBoundDays, - contextFieldId, - wrappedKey, - keyName -) { - // [START dlp_deidentify_date_shift] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // Import other required libraries - const fs = require('fs'); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The path to the CSV file to deidentify - // The first row of the file must specify column names, and all other rows - // must contain valid values - // const inputCsvFile = '/path/to/input/file.csv'; - - // The path to save the date-shifted CSV file to - // const outputCsvFile = '/path/to/output/file.csv'; - - // The list of (date) fields in the CSV file to date shift - // const dateFields = [{ name: 'birth_date'}, { name: 'register_date' }]; - - // The maximum number of days to shift a date backward - // const lowerBoundDays = 1; - - // The maximum number of days to shift a date forward - // const upperBoundDays = 1; - - // (Optional) The column to determine date shift amount based on - // If this is not specified, a random shift amount will be used for every row - // If this is specified, then 'wrappedKey' and 'keyName' must also be set - // const contextFieldId = [{ name: 'user_id' }]; - - // (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key - // If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set - // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; - - // (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates - // This key should be encrypted using the Cloud KMS key specified above - // If this is specified, then 'keyName' and 'contextFieldId' must also be set - // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' - - // Helper function for converting CSV rows to Protobuf types - const rowToProto = row => { - const values = row.split(','); - const convertedValues = values.map(value => { - if (Date.parse(value)) { - const date = new Date(value); - return { - dateValue: { - year: date.getFullYear(), - month: date.getMonth() + 1, - day: date.getDate(), - }, - }; - } else { - // Convert all non-date values to strings - return {stringValue: value.toString()}; - } - }); - return {values: convertedValues}; - }; - - // Read and parse a CSV file - const csvLines = fs - .readFileSync(inputCsvFile) - .toString() - .split('\n') - .filter(line => line.includes(',')); - const csvHeaders = csvLines[0].split(','); - const csvRows = csvLines.slice(1); - - // Construct the table object - const tableItem = { - table: { - headers: csvHeaders.map(header => { - return {name: header}; - }), - rows: csvRows.map(row => rowToProto(row)), - }, - }; - - // Construct DateShiftConfig - const dateShiftConfig = { - lowerBoundDays: lowerBoundDays, - upperBoundDays: upperBoundDays, - }; - - if (contextFieldId && keyName && wrappedKey) { - dateShiftConfig.context = {name: contextFieldId}; - dateShiftConfig.cryptoKey = { - kmsWrapped: { - wrappedKey: wrappedKey, - cryptoKeyName: keyName, - }, - }; - } else if (contextFieldId || keyName || wrappedKey) { - throw new Error( - 'You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!' - ); - } - - // Construct deidentification request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - deidentifyConfig: { - recordTransformations: { - fieldTransformations: [ - { - fields: dateFields, - primitiveTransformation: { - dateShiftConfig: dateShiftConfig, - }, - }, - ], - }, - }, - item: tableItem, - }; - - try { - // Run deidentification request - const [response] = await dlp.deidentifyContent(request); - const tableRows = response.item.table.rows; - - // Write results to a CSV file - tableRows.forEach((row, rowIndex) => { - const rowValues = row.values.map( - value => - value.stringValue || - `${value.dateValue.month}/${value.dateValue.day}/${value.dateValue.year}` - ); - csvLines[rowIndex + 1] = rowValues.join(','); - }); - csvLines.push(''); - fs.writeFileSync(outputCsvFile, csvLines.join('\n')); - - // Print status - console.log(`Successfully saved date-shift output to ${outputCsvFile}`); - } catch (err) { - console.log(`Error in deidentifyWithDateShift: ${err.message || err}`); - } - - // [END dlp_deidentify_date_shift] -} - -async function deidentifyWithFpe( - callingProjectId, - string, - alphabet, - surrogateType, - keyName, - wrappedKey -) { - // [START dlp_deidentify_fpe] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The string to deidentify - // const string = 'My SSN is 372819127'; - - // The set of characters to replace sensitive ones with - // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#ffxcommonnativealphabet - // const alphabet = 'ALPHA_NUMERIC'; - - // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key - // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; - - // The encrypted ('wrapped') AES-256 key to use - // This key should be encrypted using the Cloud KMS key specified above - // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' - - // (Optional) The name of the surrogate custom info type to use - // Only necessary if you want to reverse the deidentification process - // Can be essentially any arbitrary string, as long as it doesn't appear - // in your dataset otherwise. - // const surrogateType = 'SOME_INFO_TYPE_DEID'; - - // Construct FPE config - const cryptoReplaceFfxFpeConfig = { - cryptoKey: { - kmsWrapped: { - wrappedKey: wrappedKey, - cryptoKeyName: keyName, - }, - }, - commonAlphabet: alphabet, - }; - if (surrogateType) { - cryptoReplaceFfxFpeConfig.surrogateInfoType = { - name: surrogateType, - }; - } - - // Construct deidentification request - const item = {value: string}; - const request = { - parent: `projects/${callingProjectId}/locations/global`, - deidentifyConfig: { - infoTypeTransformations: { - transformations: [ - { - primitiveTransformation: { - cryptoReplaceFfxFpeConfig: cryptoReplaceFfxFpeConfig, - }, - }, - ], - }, - }, - item: item, - }; - - try { - // Run deidentification request - const [response] = await dlp.deidentifyContent(request); - const deidentifiedItem = response.item; - console.log(deidentifiedItem.value); - } catch (err) { - console.log(`Error in deidentifyWithFpe: ${err.message || err}`); - } - - // [END dlp_deidentify_fpe] -} - -async function reidentifyWithFpe( - callingProjectId, - string, - alphabet, - surrogateType, - keyName, - wrappedKey -) { - // [START dlp_reidentify_fpe] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The string to reidentify - // const string = 'My SSN is PHONE_TOKEN(9):#########'; - - // The set of characters to replace sensitive ones with - // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#ffxcommonnativealphabet - // const alphabet = 'ALPHA_NUMERIC'; - - // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key - // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; - - // The encrypted ('wrapped') AES-256 key to use - // This key should be encrypted using the Cloud KMS key specified above - // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' - - // The name of the surrogate custom info type to use when reidentifying data - // const surrogateType = 'SOME_INFO_TYPE_DEID'; - - // Construct deidentification request - const item = {value: string}; - const request = { - parent: `projects/${callingProjectId}/locations/global`, - reidentifyConfig: { - infoTypeTransformations: { - transformations: [ - { - primitiveTransformation: { - cryptoReplaceFfxFpeConfig: { - cryptoKey: { - kmsWrapped: { - wrappedKey: wrappedKey, - cryptoKeyName: keyName, - }, - }, - commonAlphabet: alphabet, - surrogateInfoType: { - name: surrogateType, - }, - }, - }, - }, - ], - }, - }, - inspectConfig: { - customInfoTypes: [ - { - infoType: { - name: surrogateType, - }, - surrogateType: {}, - }, - ], - }, - item: item, - }; - - try { - // Run reidentification request - const [response] = await dlp.reidentifyContent(request); - const reidentifiedItem = response.item; - console.log(reidentifiedItem.value); - } catch (err) { - console.log(`Error in reidentifyWithFpe: ${err.message || err}`); - } - - // [END dlp_reidentify_fpe] -} - -async function deidentifyWithReplacement( - callingProjectId, - string, - replacement -) { - // [START dlp_deidentify_replacement] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The string to deidentify - // const string = 'My SSN is 372819127'; - - // The string to replace sensitive information with - // const replacement = "[REDACTED]" - - // Construct deidentification request - const item = {value: string}; - const request = { - parent: `projects/${callingProjectId}/locations/global`, - deidentifyConfig: { - infoTypeTransformations: { - transformations: [ - { - primitiveTransformation: { - replaceConfig: { - newValue: { - stringValue: replacement, - }, - }, - }, - }, - ], - }, - }, - item: item, - }; - - try { - // Run deidentification request - const [response] = await dlp.deidentifyContent(request); - const deidentifiedItem = response.item; - console.log(deidentifiedItem.value); - } catch (err) { - console.log(`Error in deidentifyWithReplacement: ${err.message || err}`); - } - - // [END dlp_deidentify_replacement] -} - -const cli = require('yargs') - .demand(1) - .command( - 'deidMask ', - 'Deidentify sensitive data in a string by masking it with a character.', - { - maskingCharacter: { - type: 'string', - alias: 'm', - default: '', - }, - numberToMask: { - type: 'number', - alias: 'n', - default: 0, - }, - }, - opts => - deidentifyWithMask( - opts.callingProjectId, - opts.string, - opts.maskingCharacter, - opts.numberToMask - ) - ) - .command( - 'deidFpe ', - 'Deidentify sensitive data in a string using Format Preserving Encryption (FPE).', - { - alphabet: { - type: 'string', - alias: 'a', - default: 'ALPHA_NUMERIC', - choices: [ - 'NUMERIC', - 'HEXADECIMAL', - 'UPPER_CASE_ALPHA_NUMERIC', - 'ALPHA_NUMERIC', - ], - }, - surrogateType: { - type: 'string', - alias: 's', - default: '', - }, - }, - opts => - deidentifyWithFpe( - opts.callingProjectId, - opts.string, - opts.alphabet, - opts.surrogateType, - opts.keyName, - opts.wrappedKey - ) - ) - .command( - 'reidFpe ', - 'Reidentify sensitive data in a string using Format Preserving Encryption (FPE).', - { - alphabet: { - type: 'string', - alias: 'a', - default: 'ALPHA_NUMERIC', - choices: [ - 'NUMERIC', - 'HEXADECIMAL', - 'UPPER_CASE_ALPHA_NUMERIC', - 'ALPHA_NUMERIC', - ], - }, - }, - opts => - reidentifyWithFpe( - opts.callingProjectId, - opts.string, - opts.alphabet, - opts.surrogateType, - opts.keyName, - opts.wrappedKey - ) - ) - .command( - 'deidDateShift [dateFields...]', - 'Deidentify dates in a CSV file by pseudorandomly shifting them.', - { - contextFieldId: { - type: 'string', - alias: 'f', - default: '', - }, - wrappedKey: { - type: 'string', - alias: 'w', - default: '', - }, - keyName: { - type: 'string', - alias: 'n', - default: '', - }, - }, - opts => - deidentifyWithDateShift( - opts.callingProjectId, - opts.inputCsvFile, - opts.outputCsvFile, - opts.dateFields.map(f => { - return {name: f}; - }), - opts.lowerBoundDays, - opts.upperBoundDays, - opts.contextFieldId, - opts.wrappedKey, - opts.keyName - ).catch(console.log) - ) - .command( - 'deidReplace ', - 'Deidentify sensitive data in a string by replacing it with a given replacement string.', - {}, - opts => - deidentifyWithReplacement( - opts.callingProjectId, - opts.string, - opts.replacement - ).catch(console.log) - ) - .option('c', { - type: 'string', - alias: 'callingProjectId', - default: process.env.GCLOUD_PROJECT || '', - }) - .example('node $0 deidMask "My SSN is 372819127"') - .example( - 'node $0 deidFpe "My SSN is 372819127" projects/my-project/locations/global/keyrings/my-keyring -s SSN_TOKEN' - ) - .example( - 'node $0 reidFpe "My SSN is SSN_TOKEN(9):#########" projects/my-project/locations/global/keyrings/my-keyring SSN_TOKEN -a NUMERIC' - ) - .example( - 'node $0 deidDateShift dates.csv dates-shifted.csv 30 30 birth_date register_date [-w -n projects/my-project/locations/global/keyrings/my-keyring]' - ) - .wrap(120) - .recommendCommands() - .epilogue('For more information, see https://cloud.google.com/dlp/docs.'); - -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -} diff --git a/dlp/deidentifyWithDateShift.js b/dlp/deidentifyWithDateShift.js new file mode 100644 index 0000000000..e13d96425d --- /dev/null +++ b/dlp/deidentifyWithDateShift.js @@ -0,0 +1,191 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with Date Shift +// description: Deidentify dates in a CSV file by pseudorandomly shifting them. +// usage: node deidentifyWithDateShift.js my-project dates.csv dates-shifted.csv 30 30 birth_date register_date [ projects/my-project/locations/global/keyrings/my-keyring] + +function main( + projectId, + inputCsvFile, + outputCsvFile, + dateFields, + lowerBoundDays, + upperBoundDays, + contextFieldId, + wrappedKey, + keyName +) { + dateFields = transformCLI(dateFields); + // [START dlp_deidentify_date_shift] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // Import other required libraries + const fs = require('fs'); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The path to the CSV file to deidentify + // The first row of the file must specify column names, and all other rows + // must contain valid values + // const inputCsvFile = '/path/to/input/file.csv'; + + // The path to save the date-shifted CSV file to + // const outputCsvFile = '/path/to/output/file.csv'; + + // The list of (date) fields in the CSV file to date shift + // const dateFields = [{ name: 'birth_date'}, { name: 'register_date' }]; + + // The maximum number of days to shift a date backward + // const lowerBoundDays = 1; + + // The maximum number of days to shift a date forward + // const upperBoundDays = 1; + + // (Optional) The column to determine date shift amount based on + // If this is not specified, a random shift amount will be used for every row + // If this is specified, then 'wrappedKey' and 'keyName' must also be set + // const contextFieldId = [{ name: 'user_id' }]; + + // (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set + // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + + // (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates + // This key should be encrypted using the Cloud KMS key specified above + // If this is specified, then 'keyName' and 'contextFieldId' must also be set + // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' + + // Helper function for converting CSV rows to Protobuf types + const rowToProto = row => { + const values = row.split(','); + const convertedValues = values.map(value => { + if (Date.parse(value)) { + const date = new Date(value); + return { + dateValue: { + year: date.getFullYear(), + month: date.getMonth() + 1, + day: date.getDate(), + }, + }; + } else { + // Convert all non-date values to strings + return {stringValue: value.toString()}; + } + }); + return {values: convertedValues}; + }; + + async function deidentifyWithDateShift() { + // Read and parse a CSV file + const csvLines = fs + .readFileSync(inputCsvFile) + .toString() + .split('\n') + .filter(line => line.includes(',')); + const csvHeaders = csvLines[0].split(','); + const csvRows = csvLines.slice(1); + + // Construct the table object + const tableItem = { + table: { + headers: csvHeaders.map(header => { + return {name: header}; + }), + rows: csvRows.map(row => rowToProto(row)), + }, + }; + + // Construct DateShiftConfig + const dateShiftConfig = { + lowerBoundDays: lowerBoundDays, + upperBoundDays: upperBoundDays, + }; + + if (contextFieldId && keyName && wrappedKey) { + dateShiftConfig.context = {name: contextFieldId}; + dateShiftConfig.cryptoKey = { + kmsWrapped: { + wrappedKey: wrappedKey, + cryptoKeyName: keyName, + }, + }; + } else if (contextFieldId || keyName || wrappedKey) { + throw new Error( + 'You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!' + ); + } + + // Construct deidentification request + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + recordTransformations: { + fieldTransformations: [ + { + fields: dateFields, + primitiveTransformation: { + dateShiftConfig: dateShiftConfig, + }, + }, + ], + }, + }, + item: tableItem, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const tableRows = response.item.table.rows; + + // Write results to a CSV file + tableRows.forEach((row, rowIndex) => { + const rowValues = row.values.map( + value => + value.stringValue || + `${value.dateValue.month}/${value.dateValue.day}/${value.dateValue.year}` + ); + csvLines[rowIndex + 1] = rowValues.join(','); + }); + csvLines.push(''); + fs.writeFileSync(outputCsvFile, csvLines.join('\n')); + + // Print status + console.log(`Successfully saved date-shift output to ${outputCsvFile}`); + } + + deidentifyWithDateShift(); + // [END dlp_deidentify_date_shift] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(dateFields) { + return (dateFields = dateFields.split(',').map(type => { + return {name: type}; + })); +} diff --git a/dlp/deidentifyWithFpe.js b/dlp/deidentifyWithFpe.js new file mode 100644 index 0000000000..015b24d7ad --- /dev/null +++ b/dlp/deidentifyWithFpe.js @@ -0,0 +1,101 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with FPE +// description: Deidentify sensitive data in a string using Format Preserving Encryption (FPE). +// usage: node deidentifyWithFpe.js my-project "My SSN is 372819127" projects/my-project/locations/global/keyrings/my-keyring SSN_TOKEN + +function main(projectId, string, alphabet, keyName, wrappedKey, surrogateType) { + // [START dlp_deidentify_fpe] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to deidentify + // const string = 'My SSN is 372819127'; + + // The set of characters to replace sensitive ones with + // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#ffxcommonnativealphabet + // const alphabet = 'ALPHA_NUMERIC'; + + // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + + // The encrypted ('wrapped') AES-256 key to use + // This key should be encrypted using the Cloud KMS key specified above + // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' + + // (Optional) The name of the surrogate custom info type to use + // Only necessary if you want to reverse the deidentification process + // Can be essentially any arbitrary string, as long as it doesn't appear + // in your dataset otherwise. + // const surrogateType = 'SOME_INFO_TYPE_DEID'; + + async function deidentifyWithFpe() { + // Construct FPE config + const cryptoReplaceFfxFpeConfig = { + cryptoKey: { + kmsWrapped: { + wrappedKey: wrappedKey, + cryptoKeyName: keyName, + }, + }, + commonAlphabet: alphabet, + }; + if (surrogateType) { + cryptoReplaceFfxFpeConfig.surrogateInfoType = { + name: surrogateType, + }; + } + + // Construct deidentification request + const item = {value: string}; + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + cryptoReplaceFfxFpeConfig: cryptoReplaceFfxFpeConfig, + }, + }, + ], + }, + }, + item: item, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const deidentifiedItem = response.item; + console.log(deidentifiedItem.value); + } + deidentifyWithFpe(); + // [END dlp_deidentify_fpe] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deidentifyWithMask.js b/dlp/deidentifyWithMask.js new file mode 100644 index 0000000000..6e6df7e7ef --- /dev/null +++ b/dlp/deidentifyWithMask.js @@ -0,0 +1,80 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with Mask +// description: Deidentify sensitive data in a string by masking it with a character. +// usage: node deidentifyWithMask.js my-project string maskingCharacter numberToMask + +function main(projectId, string, maskingCharacter, numberToMask) { + // [START dlp_deidentify_masking] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project-id'; + + // The string to deidentify + // const string = 'My SSN is 372819127'; + + // (Optional) The maximum number of sensitive characters to mask in a match + // If omitted from the request or set to 0, the API will mask any matching characters + // const numberToMask = 5; + + // (Optional) The character to mask matching sensitive data with + // const maskingCharacter = 'x'; + + // Construct deidentification request + const item = {value: string}; + + async function deidentifyWithMask() { + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + characterMaskConfig: { + maskingCharacter: maskingCharacter, + numberToMask: numberToMask, + }, + }, + }, + ], + }, + }, + item: item, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const deidentifiedItem = response.item; + console.log(deidentifiedItem.value); + } + + deidentifyWithMask(); + // [END dlp_deidentify_masking] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deidentifyWithReplacement.js b/dlp/deidentifyWithReplacement.js new file mode 100644 index 0000000000..c3e5f0de7d --- /dev/null +++ b/dlp/deidentifyWithReplacement.js @@ -0,0 +1,76 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Deidentify with Replacement +// description: Deidentify sensitive data in a string by replacing it with a given replacement string. +// usage: node deidentifyWithMask.js my-project string replacement + +function main(projectId, string, replacement) { + // [START dlp_deidentify_replacement] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to deidentify + // const string = 'My SSN is 372819127'; + + // The string to replace sensitive information with + // const replacement = "[REDACTED]" + + async function deidentifyWithReplacement() { + // Construct deidentification request + const item = {value: string}; + const request = { + parent: `projects/${projectId}/locations/global`, + deidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + replaceConfig: { + newValue: { + stringValue: replacement, + }, + }, + }, + }, + ], + }, + }, + item: item, + }; + + // Run deidentification request + const [response] = await dlp.deidentifyContent(request); + const deidentifiedItem = response.item; + console.log(deidentifiedItem.value); + } + + deidentifyWithReplacement(); + // [END dlp_deidentify_replacement] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deleteInspectTemplate.js b/dlp/deleteInspectTemplate.js new file mode 100644 index 0000000000..61a8021151 --- /dev/null +++ b/dlp/deleteInspectTemplate.js @@ -0,0 +1,55 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Delete Inspect Templates +// description: Delete the DLP inspection configuration template with the specified name. +// usage: node deleteInspectTemplates.js my-project projects/my-project/inspectTemplates/##### + +function main(projectId, templateName) { + // [START dlp_delete_inspect_template] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The name of the template to delete + // Parent project ID is automatically extracted from this parameter + // const templateName = 'projects/YOUR_PROJECT_ID/inspectTemplates/#####' + async function deleteInspectTemplate() { + // Construct template-deletion request + const request = { + name: templateName, + }; + + // Run template-deletion request + await dlp.deleteInspectTemplate(request); + console.log(`Successfully deleted template ${templateName}.`); + } + + deleteInspectTemplate(); + // [END dlp_delete_inspect_template] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deleteJob.js b/dlp/deleteJob.js new file mode 100644 index 0000000000..78202468e2 --- /dev/null +++ b/dlp/deleteJob.js @@ -0,0 +1,59 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Delete Job +// description: Delete results of a Data Loss Prevention API job. +// usage: node deleteJob.js my-project projects/YOUR_GCLOUD_PROJECT/dlpJobs/X-##### + +function main(projectId, jobName) { + // [START dlp_delete_job] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The name of the job whose results should be deleted + // Parent project ID is automatically extracted from this parameter + // const jobName = 'projects/my-project/dlpJobs/X-#####' + + function deleteJob() { + // Construct job deletion request + const request = { + name: jobName, + }; + + // Run job deletion request + dlp + .deleteDlpJob(request) + .then(() => { + console.log(`Successfully deleted job ${jobName}.`); + }) + .catch(err => { + console.log(`Error in deleteJob: ${err.message || err}`); + }); + } + + deleteJob(); + // [END dlp_delete_job] +} +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/deleteTrigger.js b/dlp/deleteTrigger.js new file mode 100644 index 0000000000..9fca52f798 --- /dev/null +++ b/dlp/deleteTrigger.js @@ -0,0 +1,54 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Delete Trigger +// description: Delete results of a Data Loss Prevention API job. +// usage: node deleteTrigger.js my-rpoject projects/my-project/jobTriggers/my-trigger + +function main(projectId, triggerId) { + // [START dlp_delete_trigger] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project' + + // The name of the trigger to be deleted + // Parent project ID is automatically extracted from this parameter + // const triggerId = 'projects/my-project/triggers/my-trigger'; + + async function deleteTrigger() { + // Construct trigger deletion request + const request = { + name: triggerId, + }; + + // Run trigger deletion request + await dlp.deleteJobTrigger(request); + console.log(`Successfully deleted trigger ${triggerId}.`); + } + + deleteTrigger(); + // [END dlp_delete_trigger] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/inspect.js b/dlp/inspect.js deleted file mode 100644 index 89cbbaefc7..0000000000 --- a/dlp/inspect.js +++ /dev/null @@ -1,797 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -async function inspectString( - callingProjectId, - string, - minLikelihood, - maxFindings, - infoTypes, - customInfoTypes, - includeQuote -) { - // [START dlp_inspect_string] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The string to inspect - // const string = 'My name is Gary and my email is gary@example.com'; - - // The minimum likelihood required before returning a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The maximum number of findings to report per request (0 = server maximum) - // const maxFindings = 0; - - // The infoTypes of information to match - // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; - - // The customInfoTypes of information to match - // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, - // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; - - // Whether to include the matching string - // const includeQuote = true; - - // Construct item to inspect - const item = {value: string}; - - // Construct request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - inspectConfig: { - infoTypes: infoTypes, - customInfoTypes: customInfoTypes, - minLikelihood: minLikelihood, - includeQuote: includeQuote, - limits: { - maxFindingsPerRequest: maxFindings, - }, - }, - item: item, - }; - - // Run request - try { - const [response] = await dlp.inspectContent(request); - const findings = response.result.findings; - if (findings.length > 0) { - console.log('Findings:'); - findings.forEach(finding => { - if (includeQuote) { - console.log(`\tQuote: ${finding.quote}`); - } - console.log(`\tInfo type: ${finding.infoType.name}`); - console.log(`\tLikelihood: ${finding.likelihood}`); - }); - } else { - console.log('No findings.'); - } - } catch (err) { - console.log(`Error in inspectString: ${err.message || err}`); - } - - // [END dlp_inspect_string] -} - -async function inspectFile( - callingProjectId, - filepath, - minLikelihood, - maxFindings, - infoTypes, - customInfoTypes, - includeQuote -) { - // [START dlp_inspect_file] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Import other required libraries - const fs = require('fs'); - const mime = require('mime'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The path to a local file to inspect. Can be a text, JPG, or PNG file. - // const filepath = 'path/to/image.png'; - - // The minimum likelihood required before returning a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The maximum number of findings to report per request (0 = server maximum) - // const maxFindings = 0; - - // The infoTypes of information to match - // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; - - // The customInfoTypes of information to match - // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, - // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; - - // Whether to include the matching string - // const includeQuote = true; - - // Construct file data to inspect - const fileTypeConstant = - ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf( - mime.getType(filepath) - ) + 1; - const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64'); - const item = { - byteItem: { - type: fileTypeConstant, - data: fileBytes, - }, - }; - - // Construct request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - inspectConfig: { - infoTypes: infoTypes, - customInfoTypes: customInfoTypes, - minLikelihood: minLikelihood, - includeQuote: includeQuote, - limits: { - maxFindingsPerRequest: maxFindings, - }, - }, - item: item, - }; - - // Run request - try { - const [response] = await dlp.inspectContent(request); - const findings = response.result.findings; - if (findings.length > 0) { - console.log('Findings:'); - findings.forEach(finding => { - if (includeQuote) { - console.log(`\tQuote: ${finding.quote}`); - } - console.log(`\tInfo type: ${finding.infoType.name}`); - console.log(`\tLikelihood: ${finding.likelihood}`); - }); - } else { - console.log('No findings.'); - } - } catch (err) { - console.log(`Error in inspectFile: ${err.message || err}`); - } - // [END dlp_inspect_file] -} - -async function inspectGCSFile( - callingProjectId, - bucketName, - fileName, - topicId, - subscriptionId, - minLikelihood, - maxFindings, - infoTypes, - customInfoTypes -) { - // [START dlp_inspect_gcs] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The name of the bucket where the file resides. - // const bucketName = 'YOUR-BUCKET'; - - // The path to the file within the bucket to inspect. - // Can contain wildcards, e.g. "my-image.*" - // const fileName = 'my-image.png'; - - // The minimum likelihood required before returning a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The maximum number of findings to report per request (0 = server maximum) - // const maxFindings = 0; - - // The infoTypes of information to match - // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; - - // The customInfoTypes of information to match - // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, - // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - // Get reference to the file to be inspected - const storageItem = { - cloudStorageOptions: { - fileSet: {url: `gs://${bucketName}/${fileName}`}, - }, - }; - - // Construct request for creating an inspect job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - inspectJob: { - inspectConfig: { - infoTypes: infoTypes, - customInfoTypes: customInfoTypes, - minLikelihood: minLikelihood, - limits: { - maxFindingsPerRequest: maxFindings, - }, - }, - storageConfig: storageItem, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - - try { - // Create a GCS File inspection job and wait for it to complete - const [topicResponse] = await pubsub.topic(topicId).get(); - // Verify the Pub/Sub topic and listen for job notifications via an - // existing subscription. - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - // Get the job's ID - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - - setTimeout(() => { - console.log('Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - console.log(`Job ${job.name} status: ${job.state}`); - - const infoTypeStats = job.inspectDetails.result.infoTypeStats; - if (infoTypeStats.length > 0) { - infoTypeStats.forEach(infoTypeStat => { - console.log( - ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` - ); - }); - } else { - console.log('No findings.'); - } - } catch (err) { - console.log(`Error in inspectGCSFile: ${err.message || err}`); - } - - // [END dlp_inspect_gcs] -} - -async function inspectDatastore( - callingProjectId, - dataProjectId, - namespaceId, - kind, - topicId, - subscriptionId, - minLikelihood, - maxFindings, - infoTypes, - customInfoTypes -) { - // [START dlp_inspect_datastore] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The project ID the target Datastore is stored under - // This may or may not equal the calling project ID - // const dataProjectId = process.env.GCLOUD_PROJECT; - - // (Optional) The ID namespace of the Datastore document to inspect. - // To ignore Datastore namespaces, set this to an empty string ('') - // const namespaceId = ''; - - // The kind of the Datastore entity to inspect. - // const kind = 'Person'; - - // The minimum likelihood required before returning a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The maximum number of findings to report per request (0 = server maximum) - // const maxFindings = 0; - - // The infoTypes of information to match - // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; - - // The customInfoTypes of information to match - // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, - // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - // Construct items to be inspected - const storageItems = { - datastoreOptions: { - partitionId: { - projectId: dataProjectId, - namespaceId: namespaceId, - }, - kind: { - name: kind, - }, - }, - }; - - // Construct request for creating an inspect job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - inspectJob: { - inspectConfig: { - infoTypes: infoTypes, - customInfoTypes: customInfoTypes, - minLikelihood: minLikelihood, - limits: { - maxFindingsPerRequest: maxFindings, - }, - }, - storageConfig: storageItems, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - try { - // Run inspect-job creation request - const [topicResponse] = await pubsub.topic(topicId).get(); - // Verify the Pub/Sub topic and listen for job notifications via an - // existing subscription. - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - // Wait for DLP job to fully complete - setTimeout(() => { - console.log('Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - console.log(`Job ${job.name} status: ${job.state}`); - - const infoTypeStats = job.inspectDetails.result.infoTypeStats; - if (infoTypeStats.length > 0) { - infoTypeStats.forEach(infoTypeStat => { - console.log( - ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` - ); - }); - } else { - console.log('No findings.'); - } - } catch (err) { - console.log(`Error in inspectDatastore: ${err.message || err}`); - } - - // [END dlp_inspect_datastore] -} - -async function inspectBigquery( - callingProjectId, - dataProjectId, - datasetId, - tableId, - topicId, - subscriptionId, - minLikelihood, - maxFindings, - infoTypes, - customInfoTypes -) { - // [START dlp_inspect_bigquery] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The project ID the table is stored under - // This may or (for public datasets) may not equal the calling project ID - // const dataProjectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // const datasetId = 'my_dataset'; - - // The ID of the table to inspect, e.g. 'my_table' - // const tableId = 'my_table'; - - // The minimum likelihood required before returning a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The maximum number of findings to report per request (0 = server maximum) - // const maxFindings = 0; - - // The infoTypes of information to match - // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; - - // The customInfoTypes of information to match - // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, - // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - // Construct item to be inspected - const storageItem = { - bigQueryOptions: { - tableReference: { - projectId: dataProjectId, - datasetId: datasetId, - tableId: tableId, - }, - }, - }; - - // Construct request for creating an inspect job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - inspectJob: { - inspectConfig: { - infoTypes: infoTypes, - customInfoTypes: customInfoTypes, - minLikelihood: minLikelihood, - limits: { - maxFindingsPerRequest: maxFindings, - }, - }, - storageConfig: storageItem, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - - try { - // Run inspect-job creation request - const [topicResponse] = await pubsub.topic(topicId).get(); - // Verify the Pub/Sub topic and listen for job notifications via an - // existing subscription. - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - // Wait for DLP job to fully complete - setTimeout(() => { - console.log('Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - console.log(`Job ${job.name} status: ${job.state}`); - - const infoTypeStats = job.inspectDetails.result.infoTypeStats; - if (infoTypeStats.length > 0) { - infoTypeStats.forEach(infoTypeStat => { - console.log( - ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` - ); - }); - } else { - console.log('No findings.'); - } - } catch (err) { - console.log(`Error in inspectBigquery: ${err.message || err}`); - } - - // [END dlp_inspect_bigquery] -} - -const cli = require(`yargs`) // eslint-disable-line - .demand(1) - .command( - 'string ', - 'Inspect a string using the Data Loss Prevention API.', - {}, - opts => - inspectString( - opts.callingProjectId, - opts.string, - opts.minLikelihood, - opts.maxFindings, - opts.infoTypes, - opts.customDictionaries.concat(opts.customRegexes), - opts.includeQuote - ) - ) - .command( - 'file ', - 'Inspects a local text, PNG, or JPEG file using the Data Loss Prevention API.', - {}, - opts => - inspectFile( - opts.callingProjectId, - opts.filepath, - opts.minLikelihood, - opts.maxFindings, - opts.infoTypes, - opts.customDictionaries.concat(opts.customRegexes), - opts.includeQuote - ) - ) - .command( - 'gcsFile ', - 'Inspects a text file stored on Google Cloud Storage with the Data Loss Prevention API, using Pub/Sub for job notifications.', - {}, - opts => - inspectGCSFile( - opts.callingProjectId, - opts.bucketName, - opts.fileName, - opts.topicId, - opts.subscriptionId, - opts.minLikelihood, - opts.maxFindings, - opts.infoTypes, - opts.customDictionaries.concat(opts.customRegexes) - ) - ) - .command( - 'bigquery ', - 'Inspects a BigQuery table using the Data Loss Prevention API using Pub/Sub for job notifications.', - {}, - opts => { - inspectBigquery( - opts.callingProjectId, - opts.dataProjectId, - opts.datasetName, - opts.tableName, - opts.topicId, - opts.subscriptionId, - opts.minLikelihood, - opts.maxFindings, - opts.infoTypes, - opts.customDictionaries.concat(opts.customRegexes) - ); - } - ) - .command( - 'datastore ', - 'Inspect a Datastore instance using the Data Loss Prevention API using Pub/Sub for job notifications.', - { - namespaceId: { - type: 'string', - alias: 'n', - default: '', - }, - }, - opts => - inspectDatastore( - opts.callingProjectId, - opts.dataProjectId, - opts.namespaceId, - opts.kind, - opts.topicId, - opts.subscriptionId, - opts.minLikelihood, - opts.maxFindings, - opts.infoTypes, - opts.customDictionaries.concat(opts.customRegexes) - ) - ) - .option('m', { - alias: 'minLikelihood', - default: 'LIKELIHOOD_UNSPECIFIED', - type: 'string', - choices: [ - 'LIKELIHOOD_UNSPECIFIED', - 'VERY_UNLIKELY', - 'UNLIKELY', - 'POSSIBLE', - 'LIKELY', - 'VERY_LIKELY', - ], - global: true, - }) - .option('c', { - type: 'string', - alias: 'callingProjectId', - default: process.env.GCLOUD_PROJECT || '', - }) - .option('p', { - type: 'string', - alias: 'dataProjectId', - default: process.env.GCLOUD_PROJECT || '', - }) - .option('f', { - alias: 'maxFindings', - default: 0, - type: 'number', - global: true, - }) - .option('q', { - alias: 'includeQuote', - default: true, - type: 'boolean', - global: true, - }) - .option('t', { - alias: 'infoTypes', - default: ['PHONE_NUMBER', 'EMAIL_ADDRESS', 'CREDIT_CARD_NUMBER'], - type: 'array', - global: true, - coerce: infoTypes => - infoTypes.map(type => { - return {name: type}; - }), - }) - .option('d', { - alias: 'customDictionaries', - default: [], - type: 'array', - global: true, - coerce: customDictionaries => - customDictionaries.map((dict, idx) => { - return { - infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, - dictionary: {wordList: {words: dict.split(',')}}, - }; - }), - }) - .option('r', { - alias: 'customRegexes', - default: [], - type: 'array', - global: true, - coerce: customRegexes => - customRegexes.map((rgx, idx) => { - return { - infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, - regex: {pattern: rgx}, - }; - }), - }) - .option('n', { - alias: 'notificationTopic', - type: 'string', - global: true, - }) - .example('node $0 string "My email address is me@somedomain.com"') - .example('node $0 file resources/test.txt') - .example('node $0 gcsFile my-bucket my-file.txt my-topic my-subscription') - .example('node $0 bigquery my-dataset my-table my-topic my-subscription') - .example('node $0 datastore my-datastore-kind my-topic my-subscription') - .wrap(120) - .recommendCommands() - .epilogue( - 'For more information, see https://cloud.google.com/dlp/docs. Optional flags are explained at https://cloud.google.com/dlp/docs/reference/rest/v2/InspectConfig' - ); - -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -} diff --git a/dlp/inspectBigQuery.js b/dlp/inspectBigQuery.js new file mode 100644 index 0000000000..9db049d9ad --- /dev/null +++ b/dlp/inspectBigQuery.js @@ -0,0 +1,195 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Inspect Bigquery +// description: Inspects a BigQuery table using the Data Loss Prevention API using Pub/Sub for job notifications. +// usage: node inspectBigQuery.js my-project dataProjectId datasetId tableId topicId subscriptionId minLikelihood maxFindings infoTypes customInfoTypes + +function main( + projectId, + dataProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_bigquery] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const dataProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function inspectBigquery() { + // Construct item to be inspected + const storageItem = { + bigQueryOptions: { + tableReference: { + projectId: dataProjectId, + datasetId: datasetId, + tableId: tableId, + }, + }, + }; + + // Construct request for creating an inspect job + const request = { + parent: `projects/${projectId}/locations/global`, + inspectJob: { + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItem, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Run inspect-job creation request + const [topicResponse] = await pubsub.topic(topicId).get(); + // Verify the Pub/Sub topic and listen for job notifications via an + // existing subscription. + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + // Wait for DLP job to fully complete + setTimeout(() => { + console.log('Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + console.log(`Job ${job.name} status: ${job.state}`); + + const infoTypeStats = job.inspectDetails.result.infoTypeStats; + if (infoTypeStats.length > 0) { + infoTypeStats.forEach(infoTypeStat => { + console.log( + ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` + ); + }); + } else { + console.log('No findings.'); + } + } + + inspectBigquery(); + // [END dlp_inspect_bigquery] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectDatastore.js b/dlp/inspectDatastore.js new file mode 100644 index 0000000000..23dcc3192c --- /dev/null +++ b/dlp/inspectDatastore.js @@ -0,0 +1,198 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Inspect Datastore +// description: Inspect a Datastore instance using the Data Loss Prevention API using Pub/Sub for job notifications. +// usage: node inspectDatastore.js my-project dataProjectId namespaceId kind topicId subscriptionId minLikelihood maxFindings infoTypes customInfoTypes + +function main( + projectId, + dataProjectId, + namespaceId, + kind, + topicId, + subscriptionId, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_datastore] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the target Datastore is stored under + // This may or may not equal the calling project ID + // const dataProjectId = 'my-project'; + + // (Optional) The ID namespace of the Datastore document to inspect. + // To ignore Datastore namespaces, set this to an empty string ('') + // const namespaceId = ''; + + // The kind of the Datastore entity to inspect. + // const kind = 'Person'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function inspectDatastore() { + // Construct items to be inspected + const storageItems = { + datastoreOptions: { + partitionId: { + projectId: dataProjectId, + namespaceId: namespaceId, + }, + kind: { + name: kind, + }, + }, + }; + + // Construct request for creating an inspect job + const request = { + parent: `projects/${projectId}/locations/global`, + inspectJob: { + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItems, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + // Run inspect-job creation request + const [topicResponse] = await pubsub.topic(topicId).get(); + // Verify the Pub/Sub topic and listen for job notifications via an + // existing subscription. + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + // Wait for DLP job to fully complete + setTimeout(() => { + console.log('Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + console.log(`Job ${job.name} status: ${job.state}`); + + const infoTypeStats = job.inspectDetails.result.infoTypeStats; + if (infoTypeStats.length > 0) { + infoTypeStats.forEach(infoTypeStat => { + console.log( + ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` + ); + }); + } else { + console.log('No findings.'); + } + } + inspectDatastore(); + // [END dlp_inspect_datastore] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectFile.js b/dlp/inspectFile.js new file mode 100644 index 0000000000..ca4e485d8b --- /dev/null +++ b/dlp/inspectFile.js @@ -0,0 +1,143 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Inspect File +// description: Inspects a local text, PNG, or JPEG file using the Data Loss Prevention API. +// usage: node inspectFile.js my-project filepath minLikelihood maxFindings infoTypes customInfoTypes includeQuote + +function main( + projectId, + filepath, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes, + includeQuote +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_file] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Import other required libraries + const fs = require('fs'); + const mime = require('mime'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The path to a local file to inspect. Can be a text, JPG, or PNG file. + // const filepath = 'path/to/image.png'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + + // Whether to include the matching string + // const includeQuote = true; + + async function inspectFile() { + // Construct file data to inspect + const fileTypeConstant = + ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf( + mime.getType(filepath) + ) + 1; + const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64'); + const item = { + byteItem: { + type: fileTypeConstant, + data: fileBytes, + }, + }; + + // Construct request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + includeQuote: includeQuote, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + item: item, + }; + + // Run request + const [response] = await dlp.inspectContent(request); + const findings = response.result.findings; + if (findings.length > 0) { + console.log('Findings:'); + findings.forEach(finding => { + if (includeQuote) { + console.log(`\tQuote: ${finding.quote}`); + } + console.log(`\tInfo type: ${finding.infoType.name}`); + console.log(`\tLikelihood: ${finding.likelihood}`); + }); + } else { + console.log('No findings.'); + } + } + // [END dlp_inspect_file] + inspectFile(); +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectGCSFile.js b/dlp/inspectGCSFile.js new file mode 100644 index 0000000000..b1ccd6dde7 --- /dev/null +++ b/dlp/inspectGCSFile.js @@ -0,0 +1,187 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Inspect GCS File +// description: Inspects a text file stored on Google Cloud Storage with the Data Loss Prevention API, using Pub/Sub for job notifications. +// usage: node inspectGCSFile.js my-project filepath minLikelihood maxFindings infoTypes customInfoTypes includeQuote + +function main( + projectId, + bucketName, + fileName, + topicId, + subscriptionId, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_gcs] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The name of the bucket where the file resides. + // const bucketName = 'YOUR-BUCKET'; + + // The path to the file within the bucket to inspect. + // Can contain wildcards, e.g. "my-image.*" + // const fileName = 'my-image.png'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function inspectGCSFile() { + // Get reference to the file to be inspected + const storageItem = { + cloudStorageOptions: { + fileSet: {url: `gs://${bucketName}/${fileName}`}, + }, + }; + + // Construct request for creating an inspect job + const request = { + parent: `projects/${projectId}/locations/global`, + inspectJob: { + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + storageConfig: storageItem, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create a GCS File inspection job and wait for it to complete + const [topicResponse] = await pubsub.topic(topicId).get(); + // Verify the Pub/Sub topic and listen for job notifications via an + // existing subscription. + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + // Get the job's ID + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + + setTimeout(() => { + console.log('Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + console.log(`Job ${job.name} status: ${job.state}`); + + const infoTypeStats = job.inspectDetails.result.infoTypeStats; + if (infoTypeStats.length > 0) { + infoTypeStats.forEach(infoTypeStat => { + console.log( + ` Found ${infoTypeStat.count} instance(s) of infoType ${infoTypeStat.infoType.name}.` + ); + }); + } else { + console.log('No findings.'); + } + } + inspectGCSFile(); + // [END dlp_inspect_gcs] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/inspectString.js b/dlp/inspectString.js new file mode 100644 index 0000000000..24f46a5cec --- /dev/null +++ b/dlp/inspectString.js @@ -0,0 +1,134 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Inspects strings +// description: Inspect a string using the Data Loss Prevention API. +// usage: node inspectString.js my-project string minLikelihood maxFindings infoTypes customInfoTypes includeQuote + +function main( + projectId, + string, + minLikelihood, + maxFindings, + infoTypes, + customInfoTypes, + includeQuote +) { + [infoTypes, customInfoTypes] = transformCLI(infoTypes, customInfoTypes); + + // [START dlp_inspect_string] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to inspect + // const string = 'My name is Gary and my email is gary@example.com'; + + // The minimum likelihood required before returning a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The maximum number of findings to report per request (0 = server maximum) + // const maxFindings = 0; + + // The infoTypes of information to match + // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; + + // The customInfoTypes of information to match + // const customInfoTypes = [{ infoType: { name: 'DICT_TYPE' }, dictionary: { wordList: { words: ['foo', 'bar', 'baz']}}}, + // { infoType: { name: 'REGEX_TYPE' }, regex: '\\(\\d{3}\\) \\d{3}-\\d{4}'}]; + + // Whether to include the matching string + // const includeQuote = true; + + async function inspectString() { + // Construct item to inspect + const item = {value: string}; + + // Construct request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectConfig: { + infoTypes: infoTypes, + customInfoTypes: customInfoTypes, + minLikelihood: minLikelihood, + includeQuote: includeQuote, + limits: { + maxFindingsPerRequest: maxFindings, + }, + }, + item: item, + }; + + console.log(request.inspectConfig.infoTypes); + console.log(Array.isArray(request.inspectConfig.infoTypes)); + + // Run request + const [response] = await dlp.inspectContent(request); + const findings = response.result.findings; + if (findings.length > 0) { + console.log('Findings:'); + findings.forEach(finding => { + if (includeQuote) { + console.log(`\tQuote: ${finding.quote}`); + } + console.log(`\tInfo type: ${finding.infoType.name}`); + console.log(`\tLikelihood: ${finding.likelihood}`); + }); + } else { + console.log('No findings.'); + } + } + inspectString(); + // [END dlp_inspect_string] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes, customInfoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + + if (customInfoTypes) { + customInfoTypes = customInfoTypes.includes(',') + ? customInfoTypes.split(',').map((dict, idx) => { + return { + infoType: {name: 'CUSTOM_DICT_'.concat(idx.toString())}, + dictionary: {wordList: {words: dict.split(',')}}, + }; + }) + : customInfoTypes.split(',').map((rgx, idx) => { + return { + infoType: {name: 'CUSTOM_REGEX_'.concat(idx.toString())}, + regex: {pattern: rgx}, + }; + }); + } + + return [infoTypes, customInfoTypes]; +} diff --git a/dlp/jobs.js b/dlp/jobs.js deleted file mode 100644 index fadeb1ecca..0000000000 --- a/dlp/jobs.js +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -// sample-metadata: -// title: Job Management -async function listJobs(callingProjectId, filter, jobType) { - // [START dlp_list_jobs] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The filter expression to use - // For more information and filter syntax, see https://cloud.google.com/dlp/docs/reference/rest/v2/projects.dlpJobs/list - // const filter = `state=DONE`; - - // The type of job to list (either 'INSPECT_JOB' or 'RISK_ANALYSIS_JOB') - // const jobType = 'INSPECT_JOB'; - - // Construct request for listing DLP scan jobs - const request = { - parent: `projects/${callingProjectId}/locations/global`, - filter: filter, - type: jobType, - }; - - try { - // Run job-listing request - const [jobs] = await dlp.listDlpJobs(request); - jobs.forEach(job => { - console.log(`Job ${job.name} status: ${job.state}`); - }); - } catch (err) { - console.log(`Error in listJobs: ${err.message || err}`); - } - - // [END dlp_list_jobs] -} - -function deleteJob(jobName) { - // [START dlp_delete_job] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The name of the job whose results should be deleted - // Parent project ID is automatically extracted from this parameter - // const jobName = 'projects/my-project/dlpJobs/X-#####' - - // Construct job deletion request - const request = { - name: jobName, - }; - - // Run job deletion request - dlp - .deleteDlpJob(request) - .then(() => { - console.log(`Successfully deleted job ${jobName}.`); - }) - .catch(err => { - console.log(`Error in deleteJob: ${err.message || err}`); - }); - // [END dlp_delete_job] -} - -const cli = require(`yargs`) // eslint-disable-line - .demand(1) - .command( - 'list ', - 'List Data Loss Prevention API jobs corresponding to a given filter.', - { - jobType: { - type: 'string', - alias: 't', - default: 'INSPECT', - }, - }, - opts => listJobs(opts.callingProject, opts.filter, opts.jobType) - ) - .command( - 'delete ', - 'Delete results of a Data Loss Prevention API job.', - {}, - opts => deleteJob(opts.jobName) - ) - .option('c', { - type: 'string', - alias: 'callingProject', - default: process.env.GCLOUD_PROJECT || '', - }) - .example('node $0 list "state=DONE" -t RISK_ANALYSIS_JOB') - .example('node $0 delete projects/YOUR_GCLOUD_PROJECT/dlpJobs/X-#####') - .wrap(120) - .recommendCommands() - .epilogue('For more information, see https://cloud.google.com/dlp/docs.'); - -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -} diff --git a/dlp/kAnonymityAnalysis.js b/dlp/kAnonymityAnalysis.js new file mode 100644 index 0000000000..b604c04991 --- /dev/null +++ b/dlp/kAnonymityAnalysis.js @@ -0,0 +1,165 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: kAnonymity Analysis +// description: Computes the k-anonymity of a column set in a Google BigQuery table +// usage: node kAnonymityAnalysis.js my-project tableProjectId datasetId tableId topicId subscriptionId quasiIds + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + quasiIds +) { + quasiIds = transformCLI(quasiIds); + + // [START dlp_k_anonymity] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // A set of columns that form a composite key ('quasi-identifiers') + // const quasiIds = [{ name: 'age' }, { name: 'city' }]; + async function kAnonymityAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + // Construct request for creating a risk analysis job + + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + kAnonymityConfig: { + quasiIds: quasiIds, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const histogramBuckets = + job.riskDetails.kAnonymityResult.equivalenceClassHistogramBuckets; + + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + console.log( + ` Bucket size range: [${histogramBucket.equivalenceClassSizeLowerBound}, ${histogramBucket.equivalenceClassSizeUpperBound}]` + ); + + histogramBucket.bucketValues.forEach(valueBucket => { + const quasiIdValues = valueBucket.quasiIdsValues + .map(getValue) + .join(', '); + console.log(` Quasi-ID values: {${quasiIdValues}}`); + console.log(` Class size: ${valueBucket.equivalenceClassSize}`); + }); + }); + } + kAnonymityAnalysis(); + // [END dlp_k_anonymity] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(quasiIds) { + quasiIds = quasiIds + ? quasiIds.split(',').map((name, idx) => { + return { + name: name, + infoType: { + name: idx, + }, + }; + }) + : undefined; + return quasiIds; +} diff --git a/dlp/kMapEstimationAnalysis.js b/dlp/kMapEstimationAnalysis.js new file mode 100644 index 0000000000..204350d27d --- /dev/null +++ b/dlp/kMapEstimationAnalysis.js @@ -0,0 +1,179 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: kMap Estimation Analysis +// description: Computes the k-map risk estimation of a column set in a Google BigQuery table. +// usage: node kMapEstimationAnalysis.js my-project tableProjectId datasetId tableId topicId subscriptionId regionCode quasiIds + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + regionCode, + quasiIds, + infoTypes +) { + quasiIds = transformCLI(quasiIds, infoTypes); + + // [START dlp_k_map] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // The ISO 3166-1 region code that the data is representative of + // Can be omitted if using a region-specific infoType (such as US_ZIP_5) + // const regionCode = 'USA'; + + // A set of columns that form a composite key ('quasi-identifiers'), and + // optionally their reidentification distributions + // const quasiIds = [{ field: { name: 'age' }, infoType: { name: 'AGE' }}]; + async function kMapEstimationAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + kMapEstimationConfig: { + quasiIds: quasiIds, + regionCode: regionCode, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + + const histogramBuckets = + job.riskDetails.kMapEstimationResult.kMapEstimationHistogram; + + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + console.log( + ` Anonymity range: [${histogramBucket.minAnonymity}, ${histogramBucket.maxAnonymity}]` + ); + console.log(` Size: ${histogramBucket.bucketSize}`); + histogramBucket.bucketValues.forEach(valueBucket => { + const values = valueBucket.quasiIdsValues.map(value => getValue(value)); + console.log(` Values: ${values.join(' ')}`); + console.log( + ` Estimated k-map anonymity: ${valueBucket.estimatedAnonymity}` + ); + }); + }); + } + + kMapEstimationAnalysis(); + // [END dlp_k_map] +} +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(quasiIds, infoTypes) { + infoTypes = infoTypes ? infoTypes.split(',') : null; + + quasiIds = quasiIds + ? quasiIds.split(',').map((name, index) => { + return { + field: { + name: name, + }, + infoType: { + name: infoTypes[index], + }, + }; + }) + : undefined; + + return quasiIds; +} diff --git a/dlp/lDiversityAnalysis.js b/dlp/lDiversityAnalysis.js new file mode 100644 index 0000000000..7ca245d2d6 --- /dev/null +++ b/dlp/lDiversityAnalysis.js @@ -0,0 +1,180 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: l Diversity Analysis +// description: Computes the l-diversity of a column set in a Google BigQuery table. +// usage: node lDiversityAnalysis.js my-project tableProjectId datasetId tableId topicId subscriptionId sensitiveAttribute quasiIds + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + topicId, + subscriptionId, + sensitiveAttribute, + quasiIds +) { + quasiIds = transformCLI(quasiIds); + // [START dlp_l_diversity] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + // The column to measure l-diversity relative to, e.g. 'firstName' + // const sensitiveAttribute = 'name'; + + // A set of columns that form a composite key ('quasi-identifiers') + // const quasiIds = [{ name: 'age' }, { name: 'city' }]; + + async function lDiversityAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + lDiversityConfig: { + quasiIds: quasiIds, + sensitiveAttribute: { + name: sensitiveAttribute, + }, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const histogramBuckets = + job.riskDetails.lDiversityResult.sensitiveValueFrequencyHistogramBuckets; + + histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { + console.log(`Bucket ${histogramBucketIdx}:`); + + console.log( + `Bucket size range: [${histogramBucket.sensitiveValueFrequencyLowerBound}, ${histogramBucket.sensitiveValueFrequencyUpperBound}]` + ); + histogramBucket.bucketValues.forEach(valueBucket => { + const quasiIdValues = valueBucket.quasiIdsValues + .map(getValue) + .join(', '); + console.log(` Quasi-ID values: {${quasiIdValues}}`); + console.log(` Class size: ${valueBucket.equivalenceClassSize}`); + valueBucket.topSensitiveValues.forEach(valueObj => { + console.log( + ` Sensitive value ${getValue(valueObj.value)} occurs ${ + valueObj.count + } time(s).` + ); + }); + }); + }); + } + + lDiversityAnalysis(); + // [END dlp_l_diversity] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(quasiIds) { + quasiIds = quasiIds + ? quasiIds.split(',').map((name, idx) => { + return { + name: name, + infoType: { + name: idx, + }, + }; + }) + : undefined; + return quasiIds; +} diff --git a/dlp/listInspectTemplates.js b/dlp/listInspectTemplates.js new file mode 100644 index 0000000000..73e41aaebc --- /dev/null +++ b/dlp/listInspectTemplates.js @@ -0,0 +1,74 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: List Inspect Templates +// description: List DLP inspection configuration templates. +// usage: node listInspectTemplates.js my-project + +function main(projectId) { + // [START dlp_list_inspect_templates] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // Helper function to pretty-print dates + const formatDate = date => { + const msSinceEpoch = parseInt(date.seconds, 10) * 1000; + return new Date(msSinceEpoch).toLocaleString('en-US'); + }; + + async function listInspectTemplates() { + // Construct template-listing request + const request = { + parent: `projects/${projectId}/locations/global`, + }; + + // Run template-deletion request + const [templates] = await dlp.listInspectTemplates(request); + + templates.forEach(template => { + console.log(`Template ${template.name}`); + if (template.displayName) { + console.log(` Display name: ${template.displayName}`); + } + + console.log(` Created: ${formatDate(template.createTime)}`); + console.log(` Updated: ${formatDate(template.updateTime)}`); + + const inspectConfig = template.inspectConfig; + const infoTypes = inspectConfig.infoTypes.map(x => x.name); + console.log(' InfoTypes:', infoTypes.join(' ')); + console.log(' Minimum likelihood:', inspectConfig.minLikelihood); + console.log(' Include quotes:', inspectConfig.includeQuote); + + const limits = inspectConfig.limits; + console.log(' Max findings per request:', limits.maxFindingsPerRequest); + }); + } + + listInspectTemplates(); + // [END dlp_list_inspect_templates] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/listJobs.js b/dlp/listJobs.js new file mode 100644 index 0000000000..41469ed254 --- /dev/null +++ b/dlp/listJobs.js @@ -0,0 +1,62 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: List jobs +// description: List Data Loss Prevention API jobs corresponding to a given filter. +// usage: node listJobs.js my-project filter jobType + +function main(projectId, filter, jobType) { + // [START dlp_list_jobs] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The filter expression to use + // For more information and filter syntax, see https://cloud.google.com/dlp/docs/reference/rest/v2/projects.dlpJobs/list + // const filter = `state=DONE`; + + // The type of job to list (either 'INSPECT_JOB' or 'RISK_ANALYSIS_JOB') + // const jobType = 'INSPECT_JOB'; + async function listJobs() { + // Construct request for listing DLP scan jobs + const request = { + parent: `projects/${projectId}/locations/global`, + filter: filter, + type: jobType, + }; + + // Run job-listing request + const [jobs] = await dlp.listDlpJobs(request); + jobs.forEach(job => { + console.log(`Job ${job.name} status: ${job.state}`); + }); + } + + listJobs(); + // [END dlp_list_jobs] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/listTriggers.js b/dlp/listTriggers.js new file mode 100644 index 0000000000..05ab29e176 --- /dev/null +++ b/dlp/listTriggers.js @@ -0,0 +1,71 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: List Triggers +// description: List Data Loss Prevention API job triggers. +// usage: node listTriggers.js my-project + +function main(projectId) { + // [START dlp_list_triggers] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project' + + async function listTriggers() { + // Construct trigger listing request + const request = { + parent: `projects/${projectId}/locations/global`, + }; + + // Helper function to pretty-print dates + const formatDate = date => { + const msSinceEpoch = parseInt(date.seconds, 10) * 1000; + return new Date(msSinceEpoch).toLocaleString('en-US'); + }; + + // Run trigger listing request + const [triggers] = await dlp.listJobTriggers(request); + triggers.forEach(trigger => { + // Log trigger details + console.log(`Trigger ${trigger.name}:`); + console.log(` Created: ${formatDate(trigger.createTime)}`); + console.log(` Updated: ${formatDate(trigger.updateTime)}`); + if (trigger.displayName) { + console.log(` Display Name: ${trigger.displayName}`); + } + if (trigger.description) { + console.log(` Description: ${trigger.description}`); + } + console.log(` Status: ${trigger.status}`); + console.log(` Error count: ${trigger.errors.length}`); + }); + } + + listTriggers(); + // [END dlp_list_trigger] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/metadata.js b/dlp/metadata.js index a9cad8c12f..02cf404666 100644 --- a/dlp/metadata.js +++ b/dlp/metadata.js @@ -1,4 +1,4 @@ -// Copyright 2017 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,8 +13,12 @@ // limitations under the License. 'use strict'; +// sample-metadata: +// title: Metadata +// description: List the types of sensitive information the DLP API supports +// usage: node metadata.js my-project langaugeCode filter -async function listInfoTypes(languageCode, filter) { +function main(projectId, languageCode, filter) { // [START dlp_list_info_types] // Imports the Google Cloud Data Loss Prevention library const DLP = require('@google-cloud/dlp'); @@ -22,44 +26,35 @@ async function listInfoTypes(languageCode, filter) { // Instantiates a client const dlp = new DLP.DlpServiceClient(); + // The project ID to run the API call under + // const projectId = 'my-project'; + // The BCP-47 language code to use, e.g. 'en-US' // const languageCode = 'en-US'; // The filter to use // const filter = 'supported_by=INSPECT' - const [response] = await dlp.listInfoTypes({ - languageCode: languageCode, - filter: filter, - }); - const infoTypes = response.infoTypes; - console.log('Info types:'); - infoTypes.forEach(infoType => { - console.log(`\t${infoType.name} (${infoType.displayName})`); - }); - + async function listInfoTypes() { + const [response] = await dlp.listInfoTypes({ + languageCode: languageCode, + filter: filter, + }); + const infoTypes = response.infoTypes; + console.log('Info types:'); + infoTypes.forEach(infoType => { + console.log(`\t${infoType.name} (${infoType.displayName})`); + }); + } + + listInfoTypes(); // [END dlp_list_info_types] } -const cli = require('yargs') - .demand(1) - .command( - 'infoTypes [filter]', - 'List the types of sensitive information the DLP API supports.', - {}, - opts => listInfoTypes(opts.languageCode, opts.filter) - ) - .option('l', { - alias: 'languageCode', - default: 'en-US', - type: 'string', - global: true, - }) - .example('node $0 infoTypes "supported_by=INSPECT"') - .wrap(120) - .recommendCommands() - .epilogue('For more information, see https://cloud.google.com/dlp/docs'); +module.exports.main = main; -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -} +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/numericalRiskAnalysis.js b/dlp/numericalRiskAnalysis.js new file mode 100644 index 0000000000..2e404c7ea0 --- /dev/null +++ b/dlp/numericalRiskAnalysis.js @@ -0,0 +1,161 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Numerical Risk Analysis +// description: Computes risk metrics of a column of numbers in a Google BigQuery table. +// usage: node numericalRiskAnalysis.js my-project tableProjectId datasetId tableId columnName topicId subscriptionId + +function main( + projectId, + tableProjectId, + datasetId, + tableId, + columnName, + topicId, + subscriptionId +) { + // [START dlp_numerical_stats] + // Import the Google Cloud client libraries + const DLP = require('@google-cloud/dlp'); + const {PubSub} = require('@google-cloud/pubsub'); + + // Instantiates clients + const dlp = new DLP.DlpServiceClient(); + const pubsub = new PubSub(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The project ID the table is stored under + // This may or (for public datasets) may not equal the calling project ID + // const tableProjectId = 'my-project'; + + // The ID of the dataset to inspect, e.g. 'my_dataset' + // const datasetId = 'my_dataset'; + + // The ID of the table to inspect, e.g. 'my_table' + // const tableId = 'my_table'; + + // The name of the column to compute risk metrics for, e.g. 'age' + // Note that this column must be a numeric data type + // const columnName = 'firstName'; + + // The name of the Pub/Sub topic to notify once the job completes + // TODO(developer): create a Pub/Sub topic to use for this + // const topicId = 'MY-PUBSUB-TOPIC' + + // The name of the Pub/Sub subscription to use when listening for job + // completion notifications + // TODO(developer): create a Pub/Sub subscription to use for this + // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' + + async function numericalRiskAnalysis() { + const sourceTable = { + projectId: tableProjectId, + datasetId: datasetId, + tableId: tableId, + }; + + // Construct request for creating a risk analysis job + const request = { + parent: `projects/${projectId}/locations/global`, + riskJob: { + privacyMetric: { + numericalStatsConfig: { + field: { + name: columnName, + }, + }, + }, + sourceTable: sourceTable, + actions: [ + { + pubSub: { + topic: `projects/${projectId}/topics/${topicId}`, + }, + }, + ], + }, + }; + + // Create helper function for unpacking values + const getValue = obj => obj[Object.keys(obj)[0]]; + + // Run risk analysis job + const [topicResponse] = await pubsub.topic(topicId).get(); + const subscription = await topicResponse.subscription(subscriptionId); + const [jobsResponse] = await dlp.createDlpJob(request); + const jobName = jobsResponse.name; + // Watch the Pub/Sub topic until the DLP job finishes + await new Promise((resolve, reject) => { + const messageHandler = message => { + if (message.attributes && message.attributes.DlpJobName === jobName) { + message.ack(); + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + resolve(jobName); + } else { + message.nack(); + } + }; + + const errorHandler = err => { + subscription.removeListener('message', messageHandler); + subscription.removeListener('error', errorHandler); + reject(err); + }; + + subscription.on('message', messageHandler); + subscription.on('error', errorHandler); + }); + setTimeout(() => { + console.log(' Waiting for DLP job to fully complete'); + }, 500); + const [job] = await dlp.getDlpJob({name: jobName}); + const results = job.riskDetails.numericalStatsResult; + + console.log( + `Value Range: [${getValue(results.minValue)}, ${getValue( + results.maxValue + )}]` + ); + + // Print unique quantile values + let tempValue = null; + results.quantileValues.forEach((result, percent) => { + const value = getValue(result); + + // Only print new values + if ( + tempValue !== value && + !(tempValue && tempValue.equals && tempValue.equals(value)) + ) { + console.log(`Value at ${percent}% quantile: ${value}`); + tempValue = value; + } + }); + } + + numericalRiskAnalysis(); + // [END dlp_numerical_stats] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/quickstart.js b/dlp/quickstart.js index 637de7d565..0d60c0e433 100644 --- a/dlp/quickstart.js +++ b/dlp/quickstart.js @@ -1,4 +1,4 @@ -// Copyright 2017 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -14,10 +14,15 @@ 'use strict'; -// Imports the Google Cloud Data Loss Prevention library -const DLP = require('@google-cloud/dlp'); +// sample-metadata: +// title: Quickstart +// description: Inspects and assesses a string. +// usage: node quickstart.js my-project + +function main(projectId) { + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); -async function quickStart() { // [START dlp_quickstart] // Instantiates a client @@ -27,54 +32,60 @@ async function quickStart() { const string = 'Robert Frost'; // The project ID to run the API call under - const projectId = process.env.GCLOUD_PROJECT; + // const projectId = 'my-project'; - // The minimum likelihood required before returning a match - const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + async function quickStart() { + // The minimum likelihood required before returning a match + const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - // The maximum number of findings to report (0 = server maximum) - const maxFindings = 0; + // The maximum number of findings to report (0 = server maximum) + const maxFindings = 0; - // The infoTypes of information to match - const infoTypes = [{name: 'PERSON_NAME'}, {name: 'US_STATE'}]; + // The infoTypes of information to match + const infoTypes = [{name: 'PERSON_NAME'}, {name: 'US_STATE'}]; - // Whether to include the matching string - const includeQuote = true; + // Whether to include the matching string + const includeQuote = true; - // Construct item to inspect - const item = {value: string}; + // Construct item to inspect + const item = {value: string}; - // Construct request - const request = { - parent: `projects/${projectId}/locations/global`, - inspectConfig: { - infoTypes: infoTypes, - minLikelihood: minLikelihood, - limits: { - maxFindingsPerRequest: maxFindings, + // Construct request + const request = { + parent: `projects/${projectId}/locations/global`, + inspectConfig: { + infoTypes: infoTypes, + minLikelihood: minLikelihood, + limits: { + maxFindingsPerRequest: maxFindings, + }, + includeQuote: includeQuote, }, - includeQuote: includeQuote, - }, - item: item, - }; + item: item, + }; - // Run request - const [response] = await dlp.inspectContent(request); - const findings = response.result.findings; - if (findings.length > 0) { - console.log('Findings:'); - findings.forEach(finding => { - if (includeQuote) { - console.log(`\tQuote: ${finding.quote}`); - } - console.log(`\tInfo type: ${finding.infoType.name}`); - console.log(`\tLikelihood: ${finding.likelihood}`); - }); - } else { - console.log('No findings.'); + // Run request + const [response] = await dlp.inspectContent(request); + const findings = response.result.findings; + if (findings.length > 0) { + console.log('Findings:'); + findings.forEach(finding => { + if (includeQuote) { + console.log(`\tQuote: ${finding.quote}`); + } + console.log(`\tInfo type: ${finding.infoType.name}`); + console.log(`\tLikelihood: ${finding.likelihood}`); + }); + } else { + console.log('No findings.'); + } } + quickStart(); // [END dlp_quickstart] } -quickStart().catch(err => { - console.error(`Error in inspectString: ${err.message || err}`); + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; }); diff --git a/dlp/redact.js b/dlp/redact.js deleted file mode 100644 index 67afeea301..0000000000 --- a/dlp/redact.js +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -async function redactText(callingProjectId, string, minLikelihood, infoTypes) { - // [START dlp_redact_text] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // Construct transformation config which replaces sensitive info with its info type. - // E.g., "Her email is xxx@example.com" => "Her email is [EMAIL_ADDRESS]" - const replaceWithInfoTypeTransformation = { - primitiveTransformation: { - replaceWithInfoTypeConfig: {}, - }, - }; - - // Construct redaction request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - item: { - value: string, - }, - deidentifyConfig: { - infoTypeTransformations: { - transformations: [replaceWithInfoTypeTransformation], - }, - }, - inspectConfig: { - minLikelihood: minLikelihood, - infoTypes: infoTypes, - }, - }; - - // Run string redaction - try { - const [response] = await dlp.deidentifyContent(request); - const resultString = response.item.value; - console.log(`Redacted text: ${resultString}`); - } catch (err) { - console.log(`Error in deidentifyContent: ${err.message || err}`); - } - - // [END dlp_redact_text] -} - -async function redactImage( - callingProjectId, - filepath, - minLikelihood, - infoTypes, - outputPath -) { - // [START dlp_redact_image] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Imports required Node.js libraries - const mime = require('mime'); - const fs = require('fs'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The path to a local file to inspect. Can be a JPG or PNG image file. - // const filepath = 'path/to/image.png'; - - // The minimum likelihood required before redacting a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The infoTypes of information to redact - // const infoTypes = [{ name: 'EMAIL_ADDRESS' }, { name: 'PHONE_NUMBER' }]; - - // The local path to save the resulting image to. - // const outputPath = 'result.png'; - - const imageRedactionConfigs = infoTypes.map(infoType => { - return {infoType: infoType}; - }); - - // Load image - const fileTypeConstant = - ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf( - mime.getType(filepath) - ) + 1; - const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64'); - - // Construct image redaction request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - byteItem: { - type: fileTypeConstant, - data: fileBytes, - }, - inspectConfig: { - minLikelihood: minLikelihood, - infoTypes: infoTypes, - }, - imageRedactionConfigs: imageRedactionConfigs, - }; - - // Run image redaction request - try { - const [response] = await dlp.redactImage(request); - const image = response.redactedImage; - fs.writeFileSync(outputPath, image); - console.log(`Saved image redaction results to path: ${outputPath}`); - } catch (err) { - console.log(`Error in redactImage: ${err.message || err}`); - } - - // [END dlp_redact_image] -} - -const cli = require('yargs') - .demand(1) - .command( - 'string ', - 'Redact a string using the Data Loss Prevention API.', - {}, - opts => - redactText( - opts.callingProject, - opts.string, - opts.minLikelihood, - opts.infoTypes - ) - ) - .command( - 'image ', - 'Redact sensitive data from an image using the Data Loss Prevention API.', - {}, - opts => - redactImage( - opts.callingProject, - opts.filepath, - opts.minLikelihood, - opts.infoTypes, - opts.outputPath - ) - ) - .option('m', { - alias: 'minLikelihood', - default: 'LIKELIHOOD_UNSPECIFIED', - type: 'string', - choices: [ - 'LIKELIHOOD_UNSPECIFIED', - 'VERY_UNLIKELY', - 'UNLIKELY', - 'POSSIBLE', - 'LIKELY', - 'VERY_LIKELY', - ], - global: true, - }) - .option('t', { - alias: 'infoTypes', - required: true, - type: 'array', - global: true, - coerce: infoTypes => - infoTypes.map(type => { - return {name: type}; - }), - }) - .option('c', { - alias: 'callingProject', - default: process.env.GCLOUD_PROJECT || '', - type: 'string', - global: true, - }) - .example('node $0 image resources/test.png result.png -t MALE_NAME') - .wrap(120) - .recommendCommands() - .epilogue( - 'For more information, see https://cloud.google.com/dlp/docs. Optional flags are explained at https://cloud.google.com/dlp/docs/reference/rest/v2/projects.image/redact#ImageRedactionConfig' - ); - -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -} diff --git a/dlp/redactImage.js b/dlp/redactImage.js new file mode 100644 index 0000000000..da893c2e85 --- /dev/null +++ b/dlp/redactImage.js @@ -0,0 +1,96 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Redact Image +// description: Redact sensitive data from an image using the Data Loss Prevention API. +// usage: node redactImage.js my-project filepath minLikelihood infoTypes outputPath + +function main(projectId, filepath, minLikelihood, infoTypes, outputPath) { + infoTypes = transformCLI(infoTypes); + // [START dlp_redact_image] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Imports required Node.js libraries + const mime = require('mime'); + const fs = require('fs'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The path to a local file to inspect. Can be a JPG or PNG image file. + // const filepath = 'path/to/image.png'; + + // The minimum likelihood required before redacting a match + // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; + + // The infoTypes of information to redact + // const infoTypes = [{ name: 'EMAIL_ADDRESS' }, { name: 'PHONE_NUMBER' }]; + + // The local path to save the resulting image to. + // const outputPath = 'result.png'; + async function redactImage() { + const imageRedactionConfigs = infoTypes.map(infoType => { + return {infoType: infoType}; + }); + + // Load image + const fileTypeConstant = + ['image/jpeg', 'image/bmp', 'image/png', 'image/svg'].indexOf( + mime.getType(filepath) + ) + 1; + const fileBytes = Buffer.from(fs.readFileSync(filepath)).toString('base64'); + + // Construct image redaction request + const request = { + parent: `projects/${projectId}/locations/global`, + byteItem: { + type: fileTypeConstant, + data: fileBytes, + }, + inspectConfig: { + minLikelihood: minLikelihood, + infoTypes: infoTypes, + }, + imageRedactionConfigs: imageRedactionConfigs, + }; + + // Run image redaction request + const [response] = await dlp.redactImage(request); + const image = response.redactedImage; + fs.writeFileSync(outputPath, image); + console.log(`Saved image redaction results to path: ${outputPath}`); + } + redactImage(); + // [END dlp_redact_image] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + return infoTypes; +} diff --git a/dlp/redactText.js b/dlp/redactText.js new file mode 100644 index 0000000000..305dcd44b6 --- /dev/null +++ b/dlp/redactText.js @@ -0,0 +1,80 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// sample-metadata: +// title: Redact Text +// description: Redact sensitive data from text using the Data Loss Prevention API. +// usage: node redactText.js my-project string minLikelihood infoTypes + +function main(projectId, string, minLikelihood, infoTypes) { + infoTypes = transformCLI(infoTypes); + // [START dlp_redact_text] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // Construct transformation config which replaces sensitive info with its info type. + // E.g., "Her email is xxx@example.com" => "Her email is [EMAIL_ADDRESS]" + const replaceWithInfoTypeTransformation = { + primitiveTransformation: { + replaceWithInfoTypeConfig: {}, + }, + }; + + async function redactText() { + // Construct redaction request + const request = { + parent: `projects/${projectId}/locations/global`, + item: { + value: string, + }, + deidentifyConfig: { + infoTypeTransformations: { + transformations: [replaceWithInfoTypeTransformation], + }, + }, + inspectConfig: { + minLikelihood: minLikelihood, + infoTypes: infoTypes, + }, + }; + + // Run string redaction + const [response] = await dlp.deidentifyContent(request); + const resultString = response.item.value; + console.log(`Redacted text: ${resultString}`); + } + redactText(); + // [END dlp_redact_text] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); + +function transformCLI(infoTypes) { + infoTypes = infoTypes + ? infoTypes.split(',').map(type => { + return {name: type}; + }) + : undefined; + return infoTypes; +} diff --git a/dlp/reidentifyWithFpe.js b/dlp/reidentifyWithFpe.js new file mode 100644 index 0000000000..0fe8e14133 --- /dev/null +++ b/dlp/reidentifyWithFpe.js @@ -0,0 +1,103 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +// sample-metadata: +// title: Reidentify with FPE +// description: Reidentify sensitive data in a string using Format Preserving Encryption (FPE). +// usage: node reidentifyWithFpe.js my-project string alphabet surrogateType keyName wrappedKey + +function main(projectId, string, alphabet, surrogateType, keyName, wrappedKey) { + // [START dlp_reidentify_fpe] + // Imports the Google Cloud Data Loss Prevention library + const DLP = require('@google-cloud/dlp'); + + // Instantiates a client + const dlp = new DLP.DlpServiceClient(); + + // The project ID to run the API call under + // const projectId = 'my-project'; + + // The string to reidentify + // const string = 'My SSN is PHONE_TOKEN(9):#########'; + + // The set of characters to replace sensitive ones with + // For more information, see https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#ffxcommonnativealphabet + // const alphabet = 'ALPHA_NUMERIC'; + + // The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key + // const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'; + + // The encrypted ('wrapped') AES-256 key to use + // This key should be encrypted using the Cloud KMS key specified above + // const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY' + + // The name of the surrogate custom info type to use when reidentifying data + // const surrogateType = 'SOME_INFO_TYPE_DEID'; + + async function reidentifyWithFpe() { + // Construct deidentification request + const item = {value: string}; + const request = { + parent: `projects/${projectId}/locations/global`, + reidentifyConfig: { + infoTypeTransformations: { + transformations: [ + { + primitiveTransformation: { + cryptoReplaceFfxFpeConfig: { + cryptoKey: { + kmsWrapped: { + wrappedKey: wrappedKey, + cryptoKeyName: keyName, + }, + }, + commonAlphabet: alphabet, + surrogateInfoType: { + name: surrogateType, + }, + }, + }, + }, + ], + }, + }, + inspectConfig: { + customInfoTypes: [ + { + infoType: { + name: surrogateType, + }, + surrogateType: {}, + }, + ], + }, + item: item, + }; + + // Run reidentification request + const [response] = await dlp.reidentifyContent(request); + const reidentifiedItem = response.item; + console.log(reidentifiedItem.value); + } + reidentifyWithFpe(); + // [END dlp_reidentify_fpe] +} + +main(...process.argv.slice(2)); +process.on('unhandledRejection', err => { + console.error(err.message); + process.exitCode = 1; +}); diff --git a/dlp/risk.js b/dlp/risk.js deleted file mode 100644 index 74c66f63d5..0000000000 --- a/dlp/risk.js +++ /dev/null @@ -1,843 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -// sample-metadata: -// title: Risk Analysis -async function numericalRiskAnalysis( - callingProjectId, - tableProjectId, - datasetId, - tableId, - columnName, - topicId, - subscriptionId -) { - // [START dlp_numerical_stats] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The project ID the table is stored under - // This may or (for public datasets) may not equal the calling project ID - // const tableProjectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // const datasetId = 'my_dataset'; - - // The ID of the table to inspect, e.g. 'my_table' - // const tableId = 'my_table'; - - // The name of the column to compute risk metrics for, e.g. 'age' - // Note that this column must be a numeric data type - // const columnName = 'firstName'; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - const sourceTable = { - projectId: tableProjectId, - datasetId: datasetId, - tableId: tableId, - }; - - // Construct request for creating a risk analysis job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - riskJob: { - privacyMetric: { - numericalStatsConfig: { - field: { - name: columnName, - }, - }, - }, - sourceTable: sourceTable, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - - // Create helper function for unpacking values - const getValue = obj => obj[Object.keys(obj)[0]]; - - try { - // Run risk analysis job - const [topicResponse] = await pubsub.topic(topicId).get(); - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - setTimeout(() => { - console.log(' Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - const results = job.riskDetails.numericalStatsResult; - - console.log( - `Value Range: [${getValue(results.minValue)}, ${getValue( - results.maxValue - )}]` - ); - - // Print unique quantile values - let tempValue = null; - results.quantileValues.forEach((result, percent) => { - const value = getValue(result); - - // Only print new values - if ( - tempValue !== value && - !(tempValue && tempValue.equals && tempValue.equals(value)) - ) { - console.log(`Value at ${percent}% quantile: ${value}`); - tempValue = value; - } - }); - } catch (err) { - console.log(`Error in numericalRiskAnalysis: ${err.message || err}`); - } - - // [END dlp_numerical_stats] -} - -async function categoricalRiskAnalysis( - callingProjectId, - tableProjectId, - datasetId, - tableId, - columnName, - topicId, - subscriptionId -) { - // [START dlp_categorical_stats] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The project ID the table is stored under - // This may or (for public datasets) may not equal the calling project ID - // const tableProjectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // const datasetId = 'my_dataset'; - - // The ID of the table to inspect, e.g. 'my_table' - // const tableId = 'my_table'; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - // The name of the column to compute risk metrics for, e.g. 'firstName' - // const columnName = 'firstName'; - - const sourceTable = { - projectId: tableProjectId, - datasetId: datasetId, - tableId: tableId, - }; - - // Construct request for creating a risk analysis job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - riskJob: { - privacyMetric: { - categoricalStatsConfig: { - field: { - name: columnName, - }, - }, - }, - sourceTable: sourceTable, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - - // Create helper function for unpacking values - const getValue = obj => obj[Object.keys(obj)[0]]; - - try { - // Run risk analysis job - const [topicResponse] = await pubsub.topic(topicId).get(); - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - setTimeout(() => { - console.log(' Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - const histogramBuckets = - job.riskDetails.categoricalStatsResult.valueFrequencyHistogramBuckets; - histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { - console.log(`Bucket ${histogramBucketIdx}:`); - - // Print bucket stats - console.log( - ` Most common value occurs ${histogramBucket.valueFrequencyUpperBound} time(s)` - ); - console.log( - ` Least common value occurs ${histogramBucket.valueFrequencyLowerBound} time(s)` - ); - - // Print bucket values - console.log(`${histogramBucket.bucketSize} unique values total.`); - histogramBucket.bucketValues.forEach(valueBucket => { - console.log( - ` Value ${getValue(valueBucket.value)} occurs ${ - valueBucket.count - } time(s).` - ); - }); - }); - } catch (err) { - console.log(`Error in categoricalRiskAnalysis: ${err.message || err}`); - } - - // [END dlp_categorical_stats] -} - -async function kAnonymityAnalysis( - callingProjectId, - tableProjectId, - datasetId, - tableId, - topicId, - subscriptionId, - quasiIds -) { - // [START dlp_k_anonymity] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The project ID the table is stored under - // This may or (for public datasets) may not equal the calling project ID - // const tableProjectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // const datasetId = 'my_dataset'; - - // The ID of the table to inspect, e.g. 'my_table' - // const tableId = 'my_table'; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - // A set of columns that form a composite key ('quasi-identifiers') - // const quasiIds = [{ name: 'age' }, { name: 'city' }]; - - const sourceTable = { - projectId: tableProjectId, - datasetId: datasetId, - tableId: tableId, - }; - - // Construct request for creating a risk analysis job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - riskJob: { - privacyMetric: { - kAnonymityConfig: { - quasiIds: quasiIds, - }, - }, - sourceTable: sourceTable, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - - // Create helper function for unpacking values - const getValue = obj => obj[Object.keys(obj)[0]]; - - try { - // Run risk analysis job - const [topicResponse] = await pubsub.topic(topicId).get(); - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - setTimeout(() => { - console.log(' Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - const histogramBuckets = - job.riskDetails.kAnonymityResult.equivalenceClassHistogramBuckets; - - histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { - console.log(`Bucket ${histogramBucketIdx}:`); - console.log( - ` Bucket size range: [${histogramBucket.equivalenceClassSizeLowerBound}, ${histogramBucket.equivalenceClassSizeUpperBound}]` - ); - - histogramBucket.bucketValues.forEach(valueBucket => { - const quasiIdValues = valueBucket.quasiIdsValues - .map(getValue) - .join(', '); - console.log(` Quasi-ID values: {${quasiIdValues}}`); - console.log(` Class size: ${valueBucket.equivalenceClassSize}`); - }); - }); - } catch (err) { - console.log(`Error in kAnonymityAnalysis: ${err.message || err}`); - } - - // [END dlp_k_anonymity] -} - -async function lDiversityAnalysis( - callingProjectId, - tableProjectId, - datasetId, - tableId, - topicId, - subscriptionId, - sensitiveAttribute, - quasiIds -) { - // [START dlp_l_diversity] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The project ID the table is stored under - // This may or (for public datasets) may not equal the calling project ID - // const tableProjectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // const datasetId = 'my_dataset'; - - // The ID of the table to inspect, e.g. 'my_table' - // const tableId = 'my_table'; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - // The column to measure l-diversity relative to, e.g. 'firstName' - // const sensitiveAttribute = 'name'; - - // A set of columns that form a composite key ('quasi-identifiers') - // const quasiIds = [{ name: 'age' }, { name: 'city' }]; - - const sourceTable = { - projectId: tableProjectId, - datasetId: datasetId, - tableId: tableId, - }; - - // Construct request for creating a risk analysis job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - riskJob: { - privacyMetric: { - lDiversityConfig: { - quasiIds: quasiIds, - sensitiveAttribute: { - name: sensitiveAttribute, - }, - }, - }, - sourceTable: sourceTable, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - - // Create helper function for unpacking values - const getValue = obj => obj[Object.keys(obj)[0]]; - - try { - // Run risk analysis job - const [topicResponse] = await pubsub.topic(topicId).get(); - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - setTimeout(() => { - console.log(' Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - const histogramBuckets = - job.riskDetails.lDiversityResult.sensitiveValueFrequencyHistogramBuckets; - - histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { - console.log(`Bucket ${histogramBucketIdx}:`); - - console.log( - `Bucket size range: [${histogramBucket.sensitiveValueFrequencyLowerBound}, ${histogramBucket.sensitiveValueFrequencyUpperBound}]` - ); - histogramBucket.bucketValues.forEach(valueBucket => { - const quasiIdValues = valueBucket.quasiIdsValues - .map(getValue) - .join(', '); - console.log(` Quasi-ID values: {${quasiIdValues}}`); - console.log(` Class size: ${valueBucket.equivalenceClassSize}`); - valueBucket.topSensitiveValues.forEach(valueObj => { - console.log( - ` Sensitive value ${getValue(valueObj.value)} occurs ${ - valueObj.count - } time(s).` - ); - }); - }); - }); - } catch (err) { - console.log(`Error in lDiversityAnalysis: ${err.message || err}`); - } - - // [END dlp_l_diversity] -} - -async function kMapEstimationAnalysis( - callingProjectId, - tableProjectId, - datasetId, - tableId, - topicId, - subscriptionId, - regionCode, - quasiIds -) { - // [START dlp_k_map] - // Import the Google Cloud client libraries - const DLP = require('@google-cloud/dlp'); - const {PubSub} = require('@google-cloud/pubsub'); - - // Instantiates clients - const dlp = new DLP.DlpServiceClient(); - const pubsub = new PubSub(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The project ID the table is stored under - // This may or (for public datasets) may not equal the calling project ID - // const tableProjectId = process.env.GCLOUD_PROJECT; - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // const datasetId = 'my_dataset'; - - // The ID of the table to inspect, e.g. 'my_table' - // const tableId = 'my_table'; - - // The name of the Pub/Sub topic to notify once the job completes - // TODO(developer): create a Pub/Sub topic to use for this - // const topicId = 'MY-PUBSUB-TOPIC' - - // The name of the Pub/Sub subscription to use when listening for job - // completion notifications - // TODO(developer): create a Pub/Sub subscription to use for this - // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' - - // The ISO 3166-1 region code that the data is representative of - // Can be omitted if using a region-specific infoType (such as US_ZIP_5) - // const regionCode = 'USA'; - - // A set of columns that form a composite key ('quasi-identifiers'), and - // optionally their reidentification distributions - // const quasiIds = [{ field: { name: 'age' }, infoType: { name: 'AGE' }}]; - - const sourceTable = { - projectId: tableProjectId, - datasetId: datasetId, - tableId: tableId, - }; - - // Construct request for creating a risk analysis job - const request = { - parent: `projects/${callingProjectId}/locations/global`, - riskJob: { - privacyMetric: { - kMapEstimationConfig: { - quasiIds: quasiIds, - regionCode: regionCode, - }, - }, - sourceTable: sourceTable, - actions: [ - { - pubSub: { - topic: `projects/${callingProjectId}/topics/${topicId}`, - }, - }, - ], - }, - }; - - // Create helper function for unpacking values - const getValue = obj => obj[Object.keys(obj)[0]]; - - try { - // Run risk analysis job - const [topicResponse] = await pubsub.topic(topicId).get(); - const subscription = await topicResponse.subscription(subscriptionId); - const [jobsResponse] = await dlp.createDlpJob(request); - const jobName = jobsResponse.name; - // Watch the Pub/Sub topic until the DLP job finishes - await new Promise((resolve, reject) => { - const messageHandler = message => { - if (message.attributes && message.attributes.DlpJobName === jobName) { - message.ack(); - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - resolve(jobName); - } else { - message.nack(); - } - }; - - const errorHandler = err => { - subscription.removeListener('message', messageHandler); - subscription.removeListener('error', errorHandler); - reject(err); - }; - - subscription.on('message', messageHandler); - subscription.on('error', errorHandler); - }); - setTimeout(() => { - console.log(' Waiting for DLP job to fully complete'); - }, 500); - const [job] = await dlp.getDlpJob({name: jobName}); - const histogramBuckets = - job.riskDetails.kMapEstimationResult.kMapEstimationHistogram; - - histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { - console.log(`Bucket ${histogramBucketIdx}:`); - console.log( - ` Anonymity range: [${histogramBucket.minAnonymity}, ${histogramBucket.maxAnonymity}]` - ); - console.log(` Size: ${histogramBucket.bucketSize}`); - histogramBucket.bucketValues.forEach(valueBucket => { - const values = valueBucket.quasiIdsValues.map(value => getValue(value)); - console.log(` Values: ${values.join(' ')}`); - console.log( - ` Estimated k-map anonymity: ${valueBucket.estimatedAnonymity}` - ); - }); - }); - } catch (err) { - console.log(`Error in kMapEstimationAnalysis: ${err.message || err}`); - } - - // [END dlp_k_map] -} - -const cli = require(`yargs`) // eslint-disable-line - .demand(1) - .command( - 'numerical ', - 'Computes risk metrics of a column of numbers in a Google BigQuery table.', - {}, - opts => - numericalRiskAnalysis( - opts.callingProjectId, - opts.tableProjectId, - opts.datasetId, - opts.tableId, - opts.columnName, - opts.topicId, - opts.subscriptionId - ) - ) - .command( - 'categorical ', - 'Computes risk metrics of a column of data in a Google BigQuery table.', - {}, - opts => - categoricalRiskAnalysis( - opts.callingProjectId, - opts.tableProjectId, - opts.datasetId, - opts.tableId, - opts.columnName, - opts.topicId, - opts.subscriptionId - ) - ) - .command( - 'kAnonymity [quasiIdColumnNames..]', - 'Computes the k-anonymity of a column set in a Google BigQuery table.', - {}, - opts => - kAnonymityAnalysis( - opts.callingProjectId, - opts.tableProjectId, - opts.datasetId, - opts.tableId, - opts.topicId, - opts.subscriptionId, - opts.quasiIdColumnNames.map(f => { - return {name: f}; - }) - ) - ) - .command( - 'lDiversity [quasiIdColumnNames..]', - 'Computes the l-diversity of a column set in a Google BigQuery table.', - {}, - opts => - lDiversityAnalysis( - opts.callingProjectId, - opts.tableProjectId, - opts.datasetId, - opts.tableId, - opts.topicId, - opts.subscriptionId, - opts.sensitiveAttribute, - opts.quasiIdColumnNames.map(f => { - return {name: f}; - }) - ) - ) - .command( - 'kMap [quasiIdColumnNames..]', - 'Computes the k-map risk estimation of a column set in a Google BigQuery table.', - { - infoTypes: { - alias: 't', - type: 'array', - global: true, - default: [], - }, - regionCode: { - alias: 'r', - type: 'string', - global: true, - default: 'US', - }, - }, - opts => { - // Validate infoType count (required for CLI parsing, not the API itself) - if (opts.infoTypes.length !== opts.quasiIdColumnNames.length) { - console.error( - 'Number of infoTypes and number of quasi-identifiers must be equal!' - ); - process.exitCode = 1; - } else { - return kMapEstimationAnalysis( - opts.callingProjectId, - opts.tableProjectId, - opts.datasetId, - opts.tableId, - opts.topicId, - opts.subscriptionId, - opts.regionCode, - opts.quasiIdColumnNames.map((name, idx) => { - return { - field: { - name: name, - }, - infoType: { - name: opts.infoTypes[idx], - }, - }; - }) - ); - } - } - ) - .option('c', { - type: 'string', - alias: 'callingProjectId', - default: process.env.GCLOUD_PROJECT || '', - global: true, - }) - .option('p', { - type: 'string', - alias: 'tableProjectId', - default: process.env.GCLOUD_PROJECT || '', - global: true, - }) - .example( - 'node $0 numerical nhtsa_traffic_fatalities accident_2015 state_number my-topic my-subscription -p bigquery-public-data' - ) - .example( - 'node $0 categorical nhtsa_traffic_fatalities accident_2015 state_name my-topic my-subscription -p bigquery-public-data' - ) - .example( - 'node $0 kAnonymity nhtsa_traffic_fatalities accident_2015 my-topic my-subscription state_number county -p bigquery-public-data' - ) - .example( - 'node $0 lDiversity nhtsa_traffic_fatalities accident_2015 my-topic my-subscription city state_number county -p bigquery-public-data' - ) - .example( - 'node risk kMap san_francisco bikeshare_trips my-topic my-subscription zip_code -t US_ZIP_5 -p bigquery-public-data' - ) - .wrap(120) - .recommendCommands() - .epilogue('For more information, see https://cloud.google.com/dlp/docs.'); - -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -} diff --git a/dlp/system-test/deid.test.js b/dlp/system-test/deid.test.js index c0ece85b51..e581141dc7 100644 --- a/dlp/system-test/deid.test.js +++ b/dlp/system-test/deid.test.js @@ -16,59 +16,86 @@ const path = require('path'); const {assert} = require('chai'); -const {describe, it} = require('mocha'); +const {describe, it, before} = require('mocha'); const fs = require('fs'); const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); -const cmd = 'node deid.js'; const harmfulString = 'My SSN is 372819127'; const harmlessString = 'My favorite color is blue'; const surrogateType = 'SSN_TOKEN'; const csvFile = 'resources/dates.csv'; const tempOutputFile = path.join(__dirname, 'temp.result.csv'); const dateShiftAmount = 30; -const dateFields = 'birth_date register_date'; +const dateFields = 'birth_date,register_date'; +const client = new DLP.DlpServiceClient(); describe('deid', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); // deidentify_masking it('should mask sensitive data in a string', () => { - const output = execSync(`${cmd} deidMask "${harmfulString}" -m x -n 5`); + const output = execSync( + `node deidentifyWithMask.js ${projectId} "${harmfulString}" x 5` + ); assert.include(output, 'My SSN is xxxxx9127'); }); it('should ignore insensitive data when masking a string', () => { - const output = execSync(`${cmd} deidMask "${harmlessString}"`); + const output = execSync( + `node deidentifyWithMask.js ${projectId} "${harmlessString}"` + ); assert.include(output, harmlessString); }); it('should handle masking errors', () => { - const output = execSync(`${cmd} deidMask "${harmfulString}" -n -1`); - assert.include(output, 'Error in deidentifyWithMask'); + let output; + try { + output = cp.execSync( + `node deidentifyWithMask.js ${projectId} "${harmfulString}" 'a' '-1'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); // deidentify_fpe it('should handle FPE encryption errors', () => { - const output = execSync( - `${cmd} deidFpe "${harmfulString}" BAD_KEY_NAME BAD_KEY_NAME` - ); - assert.match(output, /Error in deidentifyWithFpe/); + let output; + try { + output = execSync( + `node deidentifyWithFpe.js ${projectId} "${harmfulString}" '[0-9A-Za-z]' 'BAD_KEY_NAME' 'BAD_KEY_NAME'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'invalid encoding'); }); // reidentify_fpe it('should handle FPE decryption errors', () => { - const output = execSync( - `${cmd} reidFpe "${harmfulString}" ${surrogateType} BAD_KEY_NAME BAD_KEY_NAME -a NUMERIC` - ); - assert.match(output, /Error in reidentifyWithFpe/); + let output; + try { + output = execSync( + `node reidentifyWithFpe.js ${projectId} "${harmfulString}" '[0-9A-Za-z]' ${surrogateType} 'BAD_KEY_NAME' 'BAD_KEY_NAME NUMERIC'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'invalid encoding'); }); // deidentify_date_shift it('should date-shift a CSV file', () => { const outputCsvFile = 'dates.actual.csv'; const output = execSync( - `${cmd} deidDateShift "${csvFile}" "${outputCsvFile}" ${dateShiftAmount} ${dateShiftAmount} ${dateFields}` + `node deidentifyWithDateShift.js ${projectId} "${csvFile}" "${outputCsvFile}" ${dateFields} ${dateShiftAmount} ${dateShiftAmount}` ); assert.include( output, @@ -81,9 +108,14 @@ describe('deid', () => { }); it('should handle date-shift errors', () => { - const output = execSync( - `${cmd} deidDateShift "${csvFile}" "${tempOutputFile}" ${dateShiftAmount} ${dateShiftAmount}` - ); - assert.match(output, /Error in deidentifyWithDateShift/); + let output; + try { + output = execSync( + `node deidentifyWithDateShift.js ${projectId} "${csvFile}" "${tempOutputFile}" ${dateShiftAmount} ${dateShiftAmount}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); }); diff --git a/dlp/system-test/inspect.test.js b/dlp/system-test/inspect.test.js index 55f1bf6d90..608169e0a0 100644 --- a/dlp/system-test/inspect.test.js +++ b/dlp/system-test/inspect.test.js @@ -20,15 +20,20 @@ const cp = require('child_process'); const {PubSub} = require('@google-cloud/pubsub'); const pubsub = new PubSub(); const uuid = require('uuid'); +const DLP = require('@google-cloud/dlp'); -const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); - -const cmd = 'node inspect.js'; const bucket = 'nodejs-docs-samples-dlp'; const dataProject = 'nodejs-docs-samples'; +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const client = new DLP.DlpServiceClient(); describe('inspect', () => { - // Create new custom topic/subscription + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); let topic, subscription; const topicName = `dlp-inspect-topic-${uuid.v4()}`; const subscriptionName = `dlp-inspect-subscription-${uuid.v4()}`; @@ -46,77 +51,95 @@ describe('inspect', () => { // inspect_string it('should inspect a string', () => { const output = execSync( - `${cmd} string "I'm Gary and my email is gary@example.com"` + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com"` ); assert.match(output, /Info type: EMAIL_ADDRESS/); }); it('should inspect a string with custom dictionary', () => { const output = execSync( - `${cmd} string "I'm Gary and my email is gary@example.com" -d "Gary,email"` + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com" 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "Gary,email"` ); assert.match(output, /Info type: CUSTOM_DICT_0/); }); it('should inspect a string with custom regex', () => { const output = execSync( - `${cmd} string "I'm Gary and my email is gary@example.com" -r "gary@example\\.com"` + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com" 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "gary@example\\.com"` ); assert.match(output, /Info type: CUSTOM_REGEX_0/); }); it('should handle a string with no sensitive data', () => { - const output = execSync(`${cmd} string "foo"`); + const output = execSync(`node inspectString.js ${projectId} string "foo"`); assert.include(output, 'No findings.'); }); it('should report string inspection handling errors', () => { - const output = execSync( - `${cmd} string "I'm Gary and my email is gary@example.com" -t BAD_TYPE` - ); - assert.match(output, /Error in inspectString/); + let output; + try { + output = execSync( + `node inspectString.js ${projectId} "I'm Gary and my email is gary@example.com" 'LIKELIHOOD_UNSPECIFIED' '0' BAD_TYPE` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'BAD_TYPE'); }); // inspect_file it('should inspect a local text file', () => { - const output = execSync(`${cmd} file resources/test.txt`); + const output = execSync( + `node inspectFile.js ${projectId} resources/test.txt` + ); assert.match(output, /Info type: PHONE_NUMBER/); assert.match(output, /Info type: EMAIL_ADDRESS/); }); it('should inspect a local text file with custom dictionary', () => { const output = execSync( - `${cmd} file resources/test.txt -d "gary@somedomain.com"` + `node inspectFile.js ${projectId} resources/test.txt 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "Gary,email"` ); assert.match(output, /Info type: CUSTOM_DICT_0/); }); it('should inspect a local text file with custom regex', () => { const output = execSync( - `${cmd} file resources/test.txt -r "\\(\\d{3}\\) \\d{3}-\\d{4}"` + `node inspectFile.js ${projectId} resources/test.txt 'LIKELIHOOD_UNSPECIFIED' '0' 'PHONE_NUMBER' "\\(\\d{3}\\) \\d{3}-\\d{4}"` ); assert.match(output, /Info type: CUSTOM_REGEX_0/); }); it('should inspect a local image file', () => { - const output = execSync(`${cmd} file resources/test.png`); + const output = execSync( + `node inspectFile.js ${projectId} resources/test.png` + ); assert.match(output, /Info type: EMAIL_ADDRESS/); }); it('should handle a local file with no sensitive data', () => { - const output = execSync(`${cmd} file resources/harmless.txt`); + const output = execSync( + `node inspectFile.js ${projectId} resources/harmless.txt` + ); assert.match(output, /No findings/); }); it('should report local file handling errors', () => { - const output = execSync(`${cmd} file resources/harmless.txt -t BAD_TYPE`); - assert.match(output, /Error in inspectFile/); + let output; + try { + output = execSync( + `node inspectFile.js ${projectId} resources/harmless.txt 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); // inspect_gcs_file_promise it.skip('should inspect a GCS text file', () => { const output = execSync( - `${cmd} gcsFile ${bucket} test.txt ${topicName} ${subscriptionName}` + `node inspectGCSFile.js ${projectId} ${bucket} test.txt ${topicName} ${subscriptionName}` ); assert.match(output, /Found \d instance\(s\) of infoType PHONE_NUMBER/); assert.match(output, /Found \d instance\(s\) of infoType EMAIL_ADDRESS/); @@ -124,7 +147,7 @@ describe('inspect', () => { it.skip('should inspect multiple GCS text files', () => { const output = execSync( - `${cmd} gcsFile ${bucket} "*.txt" ${topicName} ${subscriptionName}` + `node inspectGCSFile.js ${projectId} ${bucket} "*.txt" ${topicName} ${subscriptionName}` ); assert.match(output, /Found \d instance\(s\) of infoType PHONE_NUMBER/); assert.match(output, /Found \d instance\(s\) of infoType EMAIL_ADDRESS/); @@ -132,70 +155,85 @@ describe('inspect', () => { it.skip('should handle a GCS file with no sensitive data', () => { const output = execSync( - `${cmd} gcsFile ${bucket} harmless.txt ${topicName} ${subscriptionName}` + `node inspectGCSFile.js ${projectId} ${bucket} harmless.txt ${topicName} ${subscriptionName}` ); assert.match(output, /No findings/); }); it('should report GCS file handling errors', () => { - const output = execSync( - `${cmd} gcsFile ${bucket} harmless.txt ${topicName} ${subscriptionName} -t BAD_TYPE` - ); - assert.match(output, /Error in inspectGCSFile/); + let output; + try { + output = execSync( + `node inspectGCSFile.js ${projectId} ${bucket} harmless.txt ${topicName} ${subscriptionName} 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); // inspect_datastore it.skip('should inspect Datastore', () => { const output = execSync( - `${cmd} datastore Person ${topicName} ${subscriptionName} --namespaceId DLP -p ${dataProject}` + `node inspectDatastore.js ${projectId} Person ${topicName} ${subscriptionName} --namespaceId DLP -p ${dataProject}` ); assert.match(output, /Found \d instance\(s\) of infoType EMAIL_ADDRESS/); }); it.skip('should handle Datastore with no sensitive data', () => { const output = execSync( - `${cmd} datastore Harmless ${topicName} ${subscriptionName} --namespaceId DLP -p ${dataProject}` + `node inspectDatastore.js ${projectId} Harmless ${topicName} ${subscriptionName} --namespaceId DLP -p ${dataProject}` ); assert.match(output, /No findings/); }); it('should report Datastore errors', () => { - const output = execSync( - `${cmd} datastore Harmless ${topicName} ${subscriptionName} --namespaceId DLP -t BAD_TYPE -p ${dataProject}` - ); - assert.match(output, /Error in inspectDatastore/); + let output; + try { + output = execSync( + `node inspectDatastore.js ${projectId} ${projectId} 'DLP' 'Person' ${topicName} ${subscriptionName} 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); // inspect_bigquery it.skip('should inspect a Bigquery table', () => { const output = execSync( - `${cmd} bigquery integration_tests_dlp harmful ${topicName} ${subscriptionName} -p ${dataProject}` + `node inspectBigQuery.js ${projectId} integration_tests_dlp harmful ${topicName} ${subscriptionName} -p ${dataProject}` ); assert.match(output, /Found \d instance\(s\) of infoType PHONE_NUMBER/); }); it.skip('should handle a Bigquery table with no sensitive data', () => { const output = execSync( - `${cmd} bigquery integration_tests_dlp harmless ${topicName} ${subscriptionName} -p ${dataProject}` + `node inspectBigQuery.js ${projectId} integration_tests_dlp harmless ${topicName} ${subscriptionName} -p ${dataProject}` ); assert.match(output, /No findings/); }); it('should report Bigquery table handling errors', () => { - const output = execSync( - `${cmd} bigquery integration_tests_dlp harmless ${topicName} ${subscriptionName} -t BAD_TYPE -p ${dataProject}` - ); - assert.match(output, /Error in inspectBigquery/); + let output; + try { + output = execSync( + `node inspectBigQuery.js ${projectId} ${dataProject} integration_tests_dlp harmless ${topicName} ${subscriptionName} 'LIKELIHOOD_UNSPECIFIED' '0' 'BAD_TYPE'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); // CLI options // This test is potentially flaky, possibly because of model changes. it('should have a minLikelihood option', () => { const outputA = execSync( - `${cmd} string "My phone number is (123) 456-7890." -m VERY_LIKELY` + `node inspectString.js ${projectId} "My phone number is (123) 456-7890." VERY_LIKELY` ); const outputB = execSync( - `${cmd} string "My phone number is (123) 456-7890." -m UNLIKELY` + `node inspectString.js ${projectId} "My phone number is (123) 456-7890." UNLIKELY` ); assert.ok(outputA); assert.notMatch(outputA, /PHONE_NUMBER/); @@ -204,10 +242,10 @@ describe('inspect', () => { it('should have a maxFindings option', () => { const outputA = execSync( - `${cmd} string "My email is gary@example.com and my phone number is (223) 456-7890." -f 1` + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890." LIKELIHOOD_UNSPECIFIED 2` ); const outputB = execSync( - `${cmd} string "My email is gary@example.com and my phone number is (223) 456-7890." -f 2` + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890." LIKELIHOOD_UNSPECIFIED 3` ); assert.notStrictEqual( outputA.includes('PHONE_NUMBER'), @@ -219,22 +257,22 @@ describe('inspect', () => { it('should have an option to include quotes', () => { const outputA = execSync( - `${cmd} string "My phone number is (223) 456-7890." -q false` + `node inspectString.js ${projectId} "My phone number is (223) 456-7890." '' '' '' '' false` ); const outputB = execSync( - `${cmd} string "My phone number is (223) 456-7890."` + `node inspectString.js ${projectId} "My phone number is (223) 456-7890." '' '' '' '' ` ); assert.ok(outputA); - assert.notMatch(outputA, /\(223\) 456-7890/); - assert.match(outputB, /\(223\) 456-7890/); + assert.notMatch(outputB, /\(223\) 456-7890/); + assert.match(outputA, /\(223\) 456-7890/); }); it('should have an option to filter results by infoType', () => { const outputA = execSync( - `${cmd} string "My email is gary@example.com and my phone number is (223) 456-7890."` + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890."` ); const outputB = execSync( - `${cmd} string "My email is gary@example.com and my phone number is (223) 456-7890." -t PHONE_NUMBER` + `node inspectString.js ${projectId} "My email is gary@example.com and my phone number is (223) 456-7890." LIKELIHOOD_UNSPECIFIED 0 PHONE_NUMBER` ); assert.match(outputA, /EMAIL_ADDRESS/); assert.match(outputA, /PHONE_NUMBER/); diff --git a/dlp/system-test/jobs.test.js b/dlp/system-test/jobs.test.js index e49989f2c7..c8458a2f5b 100644 --- a/dlp/system-test/jobs.test.js +++ b/dlp/system-test/jobs.test.js @@ -21,16 +21,20 @@ const DLP = require('@google-cloud/dlp'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); -const cmd = 'node jobs.js'; const badJobName = 'projects/not-a-project/dlpJobs/i-123456789'; -const testCallingProjectId = process.env.GCLOUD_PROJECT; const testTableProjectId = 'bigquery-public-data'; const testDatasetId = 'san_francisco'; const testTableId = 'bikeshare_trips'; const testColumnName = 'zip_code'; -describe('jobs', () => { +const client = new DLP.DlpServiceClient(); +describe('test', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); // Helper function for creating test jobs const createTestJob = async () => { // Initialize client library @@ -39,7 +43,7 @@ describe('jobs', () => { // Construct job request const request = { - parent: `projects/${testCallingProjectId}/locations/global`, + parent: `projects/${projectId}/locations/global`, riskJob: { privacyMetric: { categoricalStatsConfig: { @@ -72,7 +76,7 @@ describe('jobs', () => { async function deleteStaleJobs() { const dlp = new DLP.DlpServiceClient(); const request = { - parent: `projects/${testCallingProjectId}/locations/global`, + parent: `projects/${projectId}/locations/global`, filter: 'state=DONE', type: 'RISK_ANALYSIS_JOB', }; @@ -90,7 +94,7 @@ describe('jobs', () => { // dlp_list_jobs it('should list jobs', () => { - const output = execSync(`${cmd} list 'state=DONE'`); + const output = execSync(`node listJobs.js ${projectId} 'state=DONE'`); assert.match( output, /Job projects\/(\w|-)+\/locations\/global\/dlpJobs\/\w-\d+ status: DONE/ @@ -98,7 +102,9 @@ describe('jobs', () => { }); it('should list jobs of a given type', () => { - const output = execSync(`${cmd} list 'state=DONE' -t RISK_ANALYSIS_JOB`); + const output = execSync( + `node listJobs.js ${projectId} 'state=DONE' RISK_ANALYSIS_JOB` + ); assert.match( output, /Job projects\/(\w|-)+\/locations\/global\/dlpJobs\/r-\d+ status: DONE/ @@ -106,18 +112,29 @@ describe('jobs', () => { }); it('should handle job listing errors', () => { - const output = execSync(`${cmd} list 'state=NOPE'`); - assert.match(output, /Error in listJobs/); + let output; + try { + output = execSync(`node listJobs.js ${projectId} 'state=NOPE'`); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); // dlp_delete_job it('should delete job', () => { - const output = execSync(`${cmd} delete ${testJobName}`); + const output = execSync(`node deleteJob.js ${projectId} ${testJobName}`); assert.include(output, `Successfully deleted job ${testJobName}.`); }); it('should handle job deletion errors', () => { - const output = execSync(`${cmd} delete ${badJobName}`); + let output; + try { + output = execSync(`node deleteJob.js ${projectId} ${badJobName}`); + } catch (err) { + output = err.message; + } + console.log(output); assert.match(output, /Error in deleteJob/); }); }); diff --git a/dlp/system-test/metadata.test.js b/dlp/system-test/metadata.test.js index ea84a2ff04..c8ec161ea3 100644 --- a/dlp/system-test/metadata.test.js +++ b/dlp/system-test/metadata.test.js @@ -15,21 +15,28 @@ 'use strict'; const {assert} = require('chai'); -const {describe, it} = require('mocha'); +const {describe, it, before} = require('mocha'); const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); -const cmd = 'node metadata.js'; - +const client = new DLP.DlpServiceClient(); describe('metadata', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); it('should list info types', () => { - const output = execSync(`${cmd} infoTypes`); + const output = execSync(`node metadata.js ${projectId} infoTypes`); assert.match(output, /US_DRIVERS_LICENSE_NUMBER/); }); it('should filter listed info types', () => { - const output = execSync(`${cmd} infoTypes "supported_by=RISK_ANALYSIS"`); + const output = execSync( + `node metadata.js ${projectId} infoTypes "supported_by=RISK_ANALYSIS"` + ); assert.notMatch(output, /US_DRIVERS_LICENSE_NUMBER/); }); }); diff --git a/dlp/system-test/quickstart.test.js b/dlp/system-test/quickstart.test.js index 291b65d1a0..2e000674b7 100644 --- a/dlp/system-test/quickstart.test.js +++ b/dlp/system-test/quickstart.test.js @@ -15,14 +15,21 @@ 'use strict'; const {assert} = require('chai'); -const {describe, it} = require('mocha'); +const {describe, it, before} = require('mocha'); const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); +const client = new DLP.DlpServiceClient(); describe('quickstart', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); it('should run', () => { - const output = execSync('node quickstart.js'); + const output = execSync(`node quickstart.js ${projectId}`); assert.match(output, /Info type: PERSON_NAME/); }); }); diff --git a/dlp/system-test/redact.test.js b/dlp/system-test/redact.test.js index 525087ccbd..5590a26ed8 100644 --- a/dlp/system-test/redact.test.js +++ b/dlp/system-test/redact.test.js @@ -15,18 +15,20 @@ 'use strict'; const {assert} = require('chai'); -const {describe, it} = require('mocha'); +const {describe, it, before} = require('mocha'); const fs = require('fs'); const cp = require('child_process'); const {PNG} = require('pngjs'); const pixelmatch = require('pixelmatch'); +const DLP = require('@google-cloud/dlp'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); -const cmd = 'node redact.js'; const testImage = 'resources/test.png'; const testResourcePath = 'system-test/resources'; +const client = new DLP.DlpServiceClient(); + async function readImage(filePath) { return new Promise((resolve, reject) => { fs.createReadStream(filePath) @@ -52,26 +54,30 @@ async function getImageDiffPercentage(image1Path, image2Path) { ); return diffPixels / (diff.width * diff.height); } - describe('redact', () => { + let projectId; + + before(async () => { + projectId = await client.getProjectId(); + }); // redact_text it('should redact a single sensitive data type from a string', () => { const output = execSync( - `${cmd} string "My email is jenny@example.com" -t EMAIL_ADDRESS` + `node redactText.js ${projectId} "My email is jenny@example.com" -t EMAIL_ADDRESS` ); assert.match(output, /My email is \[EMAIL_ADDRESS\]/); }); it('should redact multiple sensitive data types from a string', () => { const output = execSync( - `${cmd} string "I am 29 years old and my email is jenny@example.com" -t EMAIL_ADDRESS AGE` + `node redactText.js ${projectId} "I am 29 years old and my email is jenny@example.com" LIKELIHOOD_UNSPECIFIED 'EMAIL_ADDRESS,AGE'` ); assert.match(output, /I am \[AGE\] and my email is \[EMAIL_ADDRESS\]/); }); it('should handle string with no sensitive data', () => { const output = execSync( - `${cmd} string "No sensitive data to redact here" -t EMAIL_ADDRESS AGE` + `node redactText.js ${projectId} "No sensitive data to redact here" LIKELIHOOD_UNSPECIFIED 'EMAIL_ADDRESS,AGE'` ); assert.match(output, /No sensitive data to redact here/); }); @@ -80,7 +86,7 @@ describe('redact', () => { it('should redact a single sensitive data type from an image', async () => { const testName = 'redact-single-type'; const output = execSync( - `${cmd} image ${testImage} ${testName}.actual.png -t PHONE_NUMBER` + `node redactImage.js ${projectId} ${testImage} 'LIKELIHOOD_UNSPECIFIED' 'PHONE_NUMBER' ${testName}.actual.png` ); assert.match(output, /Saved image redaction results to path/); const difference = await getImageDiffPercentage( @@ -93,7 +99,7 @@ describe('redact', () => { it('should redact multiple sensitive data types from an image', async () => { const testName = 'redact-multiple-types'; const output = execSync( - `${cmd} image ${testImage} ${testName}.actual.png -t PHONE_NUMBER EMAIL_ADDRESS` + `node redactImage.js ${projectId} ${testImage} LIKELIHOOD_UNSPECIFIED 'PHONE_NUMBER,EMAIL_ADDRESS' ${testName}.actual.png` ); assert.match(output, /Saved image redaction results to path/); const difference = await getImageDiffPercentage( @@ -104,14 +110,26 @@ describe('redact', () => { }); it('should report info type errors', () => { - const output = execSync( - `${cmd} string "My email is jenny@example.com" -t NONEXISTENT` - ); - assert.match(output, /Error in deidentifyContent/); + let output; + try { + output = execSync( + `node redactText.js ${projectId} "My email is jenny@example.com" LIKELIHOOD_UNSPECIFIED 'NONEXISTENT'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); it('should report image redaction handling errors', () => { - const output = execSync(`${cmd} image ${testImage} output.png -t BAD_TYPE`); - assert.match(output, /Error in redactImage/); + let output; + try { + output = execSync( + `node redactImage.js ${projectId} ${testImage} output.png BAD_TYPE` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); }); diff --git a/dlp/system-test/risk.test.js b/dlp/system-test/risk.test.js index 45641c93d3..b62d937e07 100644 --- a/dlp/system-test/risk.test.js +++ b/dlp/system-test/risk.test.js @@ -19,6 +19,7 @@ const {describe, it, before, after} = require('mocha'); const uuid = require('uuid'); const {PubSub} = require('@google-cloud/pubsub'); const cp = require('child_process'); +const DLP = require('@google-cloud/dlp'); const execSync = cmd => { return cp.execSync(cmd, { @@ -27,12 +28,11 @@ const execSync = cmd => { }); }; -const cmd = 'node risk.js'; const dataset = 'integration_tests_dlp'; const uniqueField = 'Name'; const numericField = 'Age'; -const testProjectId = process.env.GCLOUD_PROJECT; const pubsub = new PubSub(); +const client = new DLP.DlpServiceClient(); /* * The tests in this file rely on a table in BigQuery entitled @@ -44,11 +44,14 @@ const pubsub = new PubSub(); * Insert into this table a few rows of Age/Name pairs. */ describe('risk', () => { + let projectId; // Create new custom topic/subscription - let topic, subscription; - const topicName = `dlp-risk-topic-${uuid.v4()}-${Date.now()}`; - const subscriptionName = `dlp-risk-subscription-${uuid.v4()}-${Date.now()}`; + let topic, subscription, topicName, subscriptionName; + before(async () => { + topicName = `dlp-risk-topic-${uuid.v4()}-${Date.now()}`; + subscriptionName = `dlp-risk-subscription-${uuid.v4()}-${Date.now()}`; + projectId = await client.getProjectId(); [topic] = await pubsub.createTopic(topicName); [subscription] = await topic.createSubscription(subscriptionName); await deleteOldTopics(); @@ -84,61 +87,77 @@ describe('risk', () => { // numericalRiskAnalysis it('should perform numerical risk analysis', () => { const output = execSync( - `${cmd} numerical ${dataset} harmful ${numericField} ${topicName} ${subscriptionName} -p ${testProjectId}` + `node numericalRiskAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${numericField} ${topicName} ${subscriptionName}` ); assert.match(output, /Value at 0% quantile:/); assert.match(output, /Value at \d+% quantile:/); }); it('should handle numerical risk analysis errors', () => { - const output = execSync( - `${cmd} numerical ${dataset} nonexistent ${numericField} ${topicName} ${subscriptionName} -p ${testProjectId}` - ); - assert.match(output, /Error in numericalRiskAnalysis/); + let output; + try { + output = execSync( + `node numericalRiskAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${numericField} ${topicName} ${subscriptionName}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'NOT_FOUND'); }); // categoricalRiskAnalysis it('should perform categorical risk analysis on a string field', () => { const output = execSync( - `${cmd} categorical ${dataset} harmful ${uniqueField} ${topicName} ${subscriptionName} -p ${testProjectId}` + `node categoricalRiskAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${uniqueField} ${topicName} ${subscriptionName}` ); assert.match(output, /Most common value occurs \d time\(s\)/); }); it('should perform categorical risk analysis on a number field', () => { const output = execSync( - `${cmd} categorical ${dataset} harmful ${numericField} ${topicName} ${subscriptionName} -p ${testProjectId}` + `node categoricalRiskAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${numericField} ${topicName} ${subscriptionName}` ); assert.match(output, /Most common value occurs \d time\(s\)/); }); it('should handle categorical risk analysis errors', () => { - const output = execSync( - `${cmd} categorical ${dataset} nonexistent ${uniqueField} ${topicName} ${subscriptionName} -p ${testProjectId}` - ); - assert.match(output, /Error in categoricalRiskAnalysis/); + let output; + try { + output = execSync( + `node categoricalRiskAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${uniqueField} ${topicName} ${subscriptionName}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); }); // kAnonymityAnalysis it('should perform k-anonymity analysis on a single field', () => { const output = execSync( - `${cmd} kAnonymity ${dataset} harmful ${topicName} ${subscriptionName} ${numericField} -p ${testProjectId}` + `node kAnonymityAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} ${numericField}` ); - assert.match(output, /Quasi-ID values:/); - assert.match(output, /Class size: \d/); + console.log(output); + assert.include(output, 'Quasi-ID values:'); + assert.include(output, 'Class size:'); }); it('should handle k-anonymity analysis errors', () => { - const output = execSync( - `${cmd} kAnonymity ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField} -p ${testProjectId}` - ); - assert.match(output, /Error in kAnonymityAnalysis/); + let output; + try { + output = execSync( + `node kAnonymityAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); }); // kMapAnalysis it('should perform k-map analysis on a single field', () => { const output = execSync( - `${cmd} kMap ${dataset} harmful ${topicName} ${subscriptionName} ${numericField} -t AGE -p ${testProjectId}` + `node kMapEstimationAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} 'US' ${numericField} AGE` ); assert.match(output, /Anonymity range: \[\d+, \d+\]/); assert.match(output, /Size: \d/); @@ -146,24 +165,29 @@ describe('risk', () => { }); it('should handle k-map analysis errors', () => { - const output = execSync( - `${cmd} kMap ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField} -t AGE -p ${testProjectId}` - ); - assert.match(output, /Error in kMapEstimationAnalysis/); + let output; + try { + output = execSync( + `node kMapEstimationAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField} AGE` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); }); it('should check that numbers of quasi-ids and info types are equal', () => { assert.throws(() => { execSync( - `${cmd} kMap ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField} -t AGE GENDER -p ${testProjectId}` + `node kMapEstimationAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} 'US' 'Age,Gender' AGE` ); - }, /Number of infoTypes and number of quasi-identifiers must be equal!/); + }, /3 INVALID_ARGUMENT: InfoType name cannot be empty of a TaggedField/); }); // lDiversityAnalysis it('should perform l-diversity analysis on a single field', () => { const output = execSync( - `${cmd} lDiversity ${dataset} harmful ${uniqueField} ${topicName} ${subscriptionName} ${numericField} -p ${testProjectId}` + `node lDiversityAnalysis.js ${projectId} ${projectId} ${dataset} harmful ${topicName} ${subscriptionName} ${uniqueField} ${numericField}` ); assert.match(output, /Quasi-ID values:/); assert.match(output, /Class size: \d/); @@ -171,9 +195,14 @@ describe('risk', () => { }); it('should handle l-diversity analysis errors', () => { - const output = execSync( - `${cmd} lDiversity ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField} -p ${testProjectId}` - ); - assert.match(output, /Error in lDiversityAnalysis/); + let output; + try { + output = execSync( + `node lDiversityAnalysis.js ${projectId} ${projectId} ${dataset} nonexistent ${topicName} ${subscriptionName} ${numericField}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); }); }); diff --git a/dlp/system-test/temp.result.csv b/dlp/system-test/temp.result.csv new file mode 100644 index 0000000000..2329cb63ce --- /dev/null +++ b/dlp/system-test/temp.result.csv @@ -0,0 +1,5 @@ +name,birth_date,register_date,credit_card +Ann,1/31/1980,8/20/1996,4532908762519852 +James,4/5/1988,5/9/2001,4301261899725540 +Dan,9/13/1945,12/15/2011,4620761856015295 +Laura,12/3/1992,2/3/2017,4564981067258901 diff --git a/dlp/system-test/templates.test.js b/dlp/system-test/templates.test.js index 0b774d6d3d..16b330d9d6 100644 --- a/dlp/system-test/templates.test.js +++ b/dlp/system-test/templates.test.js @@ -15,16 +15,19 @@ 'use strict'; const {assert} = require('chai'); -const {describe, it} = require('mocha'); +const {describe, it, before} = require('mocha'); const cp = require('child_process'); const uuid = require('uuid'); +const DLP = require('@google-cloud/dlp'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); -const cmd = 'node templates.js'; const templateName = ''; +const client = new DLP.DlpServiceClient(); describe('templates', () => { + let projectId; + let fullTemplateName; const INFO_TYPE = 'PERSON_NAME'; const MIN_LIKELIHOOD = 'VERY_LIKELY'; const MAX_FINDINGS = 5; @@ -32,42 +35,54 @@ describe('templates', () => { const DISPLAY_NAME = `My Template ${uuid.v4()}`; const TEMPLATE_NAME = `my-template-${uuid.v4()}`; - const fullTemplateName = `projects/${process.env.GCLOUD_PROJECT}/locations/global/inspectTemplates/${TEMPLATE_NAME}`; + before(async () => { + projectId = await client.getProjectId(); + fullTemplateName = `projects/${projectId}/locations/global/inspectTemplates/${TEMPLATE_NAME}`; + }); // create_inspect_template it('should create template', () => { const output = execSync( - `${cmd} create -m ${MIN_LIKELIHOOD} -t ${INFO_TYPE} -f ${MAX_FINDINGS} -q ${INCLUDE_QUOTE} -d "${DISPLAY_NAME}" -i "${TEMPLATE_NAME}"` + `node createInspectTemplate.js ${projectId} "${TEMPLATE_NAME}" "${DISPLAY_NAME}" ${INFO_TYPE} ${INCLUDE_QUOTE} ${MIN_LIKELIHOOD} ${MAX_FINDINGS}` ); + console.log(output); assert.include(output, `Successfully created template ${fullTemplateName}`); }); it('should handle template creation errors', () => { - const output = execSync(`${cmd} create -i invalid_template#id`); - assert.match(output, /Error in createInspectTemplate/); + let output; + try { + output = execSync( + `node createInspectTemplate.js ${projectId} invalid_template#id` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); // list_inspect_templates it('should list templates', () => { - const output = execSync(`${cmd} list`); + const output = execSync(`node listInspectTemplates.js ${projectId}`); assert.include(output, `Template ${templateName}`); assert.match(output, /Created: \d{1,2}\/\d{1,2}\/\d{4}/); assert.match(output, /Updated: \d{1,2}\/\d{1,2}\/\d{4}/); }); it('should pass creation settings to template', () => { - const output = execSync(`${cmd} list`); - assert.include(output, `Template ${fullTemplateName}`); - assert.include(output, `Display name: ${DISPLAY_NAME}`); - assert.include(output, `InfoTypes: ${INFO_TYPE}`); - assert.include(output, `Minimum likelihood: ${MIN_LIKELIHOOD}`); - assert.include(output, `Include quotes: ${INCLUDE_QUOTE}`); - assert.include(output, `Max findings per request: ${MAX_FINDINGS}`); + const output = execSync(`node listInspectTemplates.js ${projectId}`); + assert.include(output, fullTemplateName); + assert.include(output, DISPLAY_NAME); + assert.include(output, INFO_TYPE); + assert.include(output, MIN_LIKELIHOOD); + assert.include(output, MAX_FINDINGS); }); // delete_inspect_template it('should delete template', () => { - const output = execSync(`${cmd} delete ${fullTemplateName}`); + const output = execSync( + `node deleteInspectTemplate.js ${projectId} ${fullTemplateName}` + ); assert.include( output, `Successfully deleted template ${fullTemplateName}.` @@ -75,7 +90,14 @@ describe('templates', () => { }); it('should handle template deletion errors', () => { - const output = execSync(`${cmd} delete BAD_TEMPLATE`); - assert.match(output, /Error in deleteInspectTemplate/); + let output; + try { + output = execSync( + `node deleteInspectTemplate.js ${projectId} BAD_TEMPLATE` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'INVALID_ARGUMENT'); }); }); diff --git a/dlp/system-test/triggers.test.js b/dlp/system-test/triggers.test.js index 7271623a03..907b03b9dd 100644 --- a/dlp/system-test/triggers.test.js +++ b/dlp/system-test/triggers.test.js @@ -15,17 +15,19 @@ 'use strict'; const {assert} = require('chai'); -const {describe, it} = require('mocha'); +const {describe, it, before} = require('mocha'); const cp = require('child_process'); const uuid = require('uuid'); +const DLP = require('@google-cloud/dlp'); const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); +const client = new DLP.DlpServiceClient(); + describe('triggers', () => { - const projectId = process.env.GCLOUD_PROJECT; - const cmd = 'node triggers.js'; + let projectId; + let fullTriggerName; const triggerName = `my-trigger-${uuid.v4()}`; - const fullTriggerName = `projects/${projectId}/locations/global/jobTriggers/${triggerName}`; const triggerDisplayName = `My Trigger Display Name: ${uuid.v4()}`; const triggerDescription = `My Trigger Description: ${uuid.v4()}`; const infoType = 'PERSON_NAME'; @@ -33,16 +35,20 @@ describe('triggers', () => { const maxFindings = 5; const bucketName = process.env.BUCKET_NAME; + before(async () => { + projectId = await client.getProjectId(); + fullTriggerName = `projects/${projectId}/locations/global/jobTriggers/${triggerName}`; + }); + it('should create a trigger', () => { const output = execSync( - `${cmd} create ${bucketName} 1 -n ${triggerName} --autoPopulateTimespan \ - -m ${minLikelihood} -t ${infoType} -f ${maxFindings} -d "${triggerDisplayName}" -s "${triggerDescription}"` + `node createTrigger.js ${projectId} ${triggerName} "${triggerDisplayName}" "${triggerDescription}" ${bucketName} true '1' ${infoType} ${minLikelihood} ${maxFindings}` ); assert.include(output, `Successfully created trigger ${fullTriggerName}`); }); it('should list triggers', () => { - const output = execSync(`${cmd} list`); + const output = execSync(`node listTriggers.js ${projectId}`); assert.include(output, `Trigger ${fullTriggerName}`); assert.include(output, `Display Name: ${triggerDisplayName}`); assert.include(output, `Description: ${triggerDescription}`); @@ -53,19 +59,33 @@ describe('triggers', () => { }); it('should delete a trigger', () => { - const output = execSync(`${cmd} delete ${fullTriggerName}`); + const output = execSync( + `node deleteTrigger.js ${projectId} ${fullTriggerName}` + ); assert.include(output, `Successfully deleted trigger ${fullTriggerName}.`); }); it('should handle trigger creation errors', () => { - const output = execSync( - `${cmd} create ${bucketName} 1 -n "@@@@@" -m ${minLikelihood} -t ${infoType} -f ${maxFindings}` - ); - assert.match(output, /Error in createTrigger/); + let output; + try { + output = execSync( + `node createTrigger.js ${projectId} 'name' "${triggerDisplayName}" ${bucketName} true 1 "@@@@@" ${minLikelihood} ${maxFindings}` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); }); it('should handle trigger deletion errors', () => { - const output = execSync(`${cmd} delete bad-trigger-path`); - assert.match(output, /Error in deleteTrigger/); + let output; + try { + output = execSync( + `node deleteTrigger.js ${projectId} 'bad-trigger-path'` + ); + } catch (err) { + output = err.message; + } + assert.include(output, 'fail'); }); }); diff --git a/dlp/templates.js b/dlp/templates.js deleted file mode 100644 index 31e5ceccdb..0000000000 --- a/dlp/templates.js +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -// sample-metadata: -// title: Inspect Templates -async function createInspectTemplate( - callingProjectId, - templateId, - displayName, - infoTypes, - includeQuote, - minLikelihood, - maxFindings -) { - // [START dlp_create_inspect_template] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // The minimum likelihood required before returning a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The maximum number of findings to report per request (0 = server maximum) - // const maxFindings = 0; - - // The infoTypes of information to match - // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; - - // Whether to include the matching string - // const includeQuote = true; - - // (Optional) The name of the template to be created. - // const templateId = 'my-template'; - - // (Optional) The human-readable name to give the template - // const displayName = 'My template'; - - // Construct the inspection configuration for the template - const inspectConfig = { - infoTypes: infoTypes, - minLikelihood: minLikelihood, - includeQuote: includeQuote, - limits: { - maxFindingsPerRequest: maxFindings, - }, - }; - - // Construct template-creation request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - inspectTemplate: { - inspectConfig: inspectConfig, - displayName: displayName, - }, - templateId: templateId, - }; - - try { - const [response] = await dlp.createInspectTemplate(request); - const templateName = response.name; - console.log(`Successfully created template ${templateName}.`); - } catch (err) { - console.log(`Error in createInspectTemplate: ${err.message || err}`); - } - - // [END dlp_create_inspect_template] -} - -async function listInspectTemplates(callingProjectId) { - // [START dlp_list_inspect_templates] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // Helper function to pretty-print dates - const formatDate = date => { - const msSinceEpoch = parseInt(date.seconds, 10) * 1000; - return new Date(msSinceEpoch).toLocaleString('en-US'); - }; - - // Construct template-listing request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - }; - - try { - // Run template-deletion request - const [templates] = await dlp.listInspectTemplates(request); - - templates.forEach(template => { - console.log(`Template ${template.name}`); - if (template.displayName) { - console.log(` Display name: ${template.displayName}`); - } - - console.log(` Created: ${formatDate(template.createTime)}`); - console.log(` Updated: ${formatDate(template.updateTime)}`); - - const inspectConfig = template.inspectConfig; - const infoTypes = inspectConfig.infoTypes.map(x => x.name); - console.log(' InfoTypes:', infoTypes.join(' ')); - console.log(' Minimum likelihood:', inspectConfig.minLikelihood); - console.log(' Include quotes:', inspectConfig.includeQuote); - - const limits = inspectConfig.limits; - console.log(' Max findings per request:', limits.maxFindingsPerRequest); - }); - } catch (err) { - console.log(`Error in listInspectTemplates: ${err.message || err}`); - } - - // [END dlp_list_inspect_templates] -} - -async function deleteInspectTemplate(templateName) { - // [START dlp_delete_inspect_template] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The name of the template to delete - // Parent project ID is automatically extracted from this parameter - // const templateName = 'projects/YOUR_PROJECT_ID/inspectTemplates/#####' - - // Construct template-deletion request - const request = { - name: templateName, - }; - - try { - // Run template-deletion request - await dlp.deleteInspectTemplate(request); - console.log(`Successfully deleted template ${templateName}.`); - } catch (err) { - console.log(`Error in deleteInspectTemplate: ${err.message || err}`); - } - - // [END dlp_delete_inspect_template] -} - -const cli = require(`yargs`) // eslint-disable-line - .demand(1) - .command( - 'create', - 'Create a new DLP inspection configuration template.', - { - minLikelihood: { - alias: 'm', - default: 'LIKELIHOOD_UNSPECIFIED', - type: 'string', - choices: [ - 'LIKELIHOOD_UNSPECIFIED', - 'VERY_UNLIKELY', - 'UNLIKELY', - 'POSSIBLE', - 'LIKELY', - 'VERY_LIKELY', - ], - global: true, - }, - infoTypes: { - alias: 't', - default: ['PHONE_NUMBER', 'EMAIL_ADDRESS', 'CREDIT_CARD_NUMBER'], - type: 'array', - global: true, - coerce: infoTypes => - infoTypes.map(type => { - return {name: type}; - }), - }, - includeQuote: { - alias: 'q', - default: true, - type: 'boolean', - global: true, - }, - maxFindings: { - alias: 'f', - default: 0, - type: 'number', - global: true, - }, - templateId: { - alias: 'i', - default: '', - type: 'string', - global: true, - }, - displayName: { - alias: 'd', - default: '', - type: 'string', - global: true, - }, - }, - opts => - createInspectTemplate( - opts.callingProjectId, - opts.templateId, - opts.displayName, - opts.infoTypes, - opts.includeQuote, - opts.minLikelihood, - opts.maxFindings - ) - ) - .command('list', 'List DLP inspection configuration templates.', {}, opts => - listInspectTemplates(opts.callingProjectId) - ) - .command( - 'delete ', - 'Delete the DLP inspection configuration template with the specified name.', - {}, - opts => deleteInspectTemplate(opts.templateName) - ) - .option('c', { - type: 'string', - alias: 'callingProjectId', - default: process.env.GCLOUD_PROJECT || '', - global: true, - }) - .option('p', { - type: 'string', - alias: 'tableProjectId', - default: process.env.GCLOUD_PROJECT || '', - global: true, - }) - .example( - 'node $0 create -m VERY_LIKELY -t PERSON_NAME -f 5 -q false -i my-template-id' - ) - .example('node $0 list') - .example('node $0 delete projects/my-project/inspectTemplates/#####') - .wrap(120) - .recommendCommands() - .epilogue('For more information, see https://cloud.google.com/dlp/docs.'); - -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -} diff --git a/dlp/triggers.js b/dlp/triggers.js deleted file mode 100644 index a1f70d1c9b..0000000000 --- a/dlp/triggers.js +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright 2017 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -'use strict'; - -// sample-metadata: -// title: Job Triggers -async function createTrigger( - callingProjectId, - triggerId, - displayName, - description, - bucketName, - autoPopulateTimespan, - scanPeriod, - infoTypes, - minLikelihood, - maxFindings -) { - // [START dlp_create_trigger] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // (Optional) The name of the trigger to be created. - // const triggerId = 'my-trigger'; - - // (Optional) A display name for the trigger to be created - // const displayName = 'My Trigger'; - - // (Optional) A description for the trigger to be created - // const description = "This is a sample trigger."; - - // The name of the bucket to scan. - // const bucketName = 'YOUR-BUCKET'; - - // Limit scan to new content only. - // const autoPopulateTimespan = true; - - // How often to wait between scans, in days (minimum = 1 day) - // const scanPeriod = 1; - - // The infoTypes of information to match - // const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; - - // The minimum likelihood required before returning a match - // const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; - - // The maximum number of findings to report per request (0 = server maximum) - // const maxFindings = 0; - - // Get reference to the bucket to be inspected - const storageItem = { - cloudStorageOptions: { - fileSet: {url: `gs://${bucketName}/*`}, - }, - timeSpanConfig: { - enableAutoPopulationOfTimespanConfig: autoPopulateTimespan, - }, - }; - - // Construct job to be triggered - const job = { - inspectConfig: { - infoTypes: infoTypes, - minLikelihood: minLikelihood, - limits: { - maxFindingsPerRequest: maxFindings, - }, - }, - storageConfig: storageItem, - }; - - // Construct trigger creation request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - jobTrigger: { - inspectJob: job, - displayName: displayName, - description: description, - triggers: [ - { - schedule: { - recurrencePeriodDuration: { - seconds: scanPeriod * 60 * 60 * 24, // Trigger the scan daily - }, - }, - }, - ], - status: 'HEALTHY', - }, - triggerId: triggerId, - }; - - try { - // Run trigger creation request - const [trigger] = await dlp.createJobTrigger(request); - console.log(`Successfully created trigger ${trigger.name}.`); - } catch (err) { - console.log(`Error in createTrigger: ${err.message || err}`); - } - - // [END dlp_create_trigger] -} - -async function listTriggers(callingProjectId) { - // [START dlp_list_triggers] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The project ID to run the API call under - // const callingProjectId = process.env.GCLOUD_PROJECT; - - // Construct trigger listing request - const request = { - parent: `projects/${callingProjectId}/locations/global`, - }; - - // Helper function to pretty-print dates - const formatDate = date => { - const msSinceEpoch = parseInt(date.seconds, 10) * 1000; - return new Date(msSinceEpoch).toLocaleString('en-US'); - }; - - try { - // Run trigger listing request - const [triggers] = await dlp.listJobTriggers(request); - triggers.forEach(trigger => { - // Log trigger details - console.log(`Trigger ${trigger.name}:`); - console.log(` Created: ${formatDate(trigger.createTime)}`); - console.log(` Updated: ${formatDate(trigger.updateTime)}`); - if (trigger.displayName) { - console.log(` Display Name: ${trigger.displayName}`); - } - if (trigger.description) { - console.log(` Description: ${trigger.description}`); - } - console.log(` Status: ${trigger.status}`); - console.log(` Error count: ${trigger.errors.length}`); - }); - } catch (err) { - console.log(`Error in listTriggers: ${err.message || err}`); - } - // [END dlp_list_trigger] -} - -async function deleteTrigger(triggerId) { - // [START dlp_delete_trigger] - // Imports the Google Cloud Data Loss Prevention library - const DLP = require('@google-cloud/dlp'); - - // Instantiates a client - const dlp = new DLP.DlpServiceClient(); - - // The name of the trigger to be deleted - // Parent project ID is automatically extracted from this parameter - // const triggerId = 'projects/my-project/triggers/my-trigger'; - - // Construct trigger deletion request - const request = { - name: triggerId, - }; - try { - // Run trigger deletion request - await dlp.deleteJobTrigger(request); - console.log(`Successfully deleted trigger ${triggerId}.`); - } catch (err) { - console.log(`Error in deleteTrigger: ${err.message || err}`); - } - - // [END dlp_delete_trigger] -} - -const cli = require(`yargs`) // eslint-disable-line - .demand(1) - .command( - 'create ', - 'Create a Data Loss Prevention API job trigger.', - { - infoTypes: { - alias: 't', - default: ['PHONE_NUMBER', 'EMAIL_ADDRESS', 'CREDIT_CARD_NUMBER'], - type: 'array', - global: true, - coerce: infoTypes => - infoTypes.map(type => { - return {name: type}; - }), - }, - triggerId: { - alias: 'n', - default: '', - type: 'string', - }, - displayName: { - alias: 'd', - default: '', - type: 'string', - }, - description: { - alias: 's', - default: '', - type: 'string', - }, - autoPopulateTimespan: { - default: false, - type: 'boolean', - }, - minLikelihood: { - alias: 'm', - default: 'LIKELIHOOD_UNSPECIFIED', - type: 'string', - choices: [ - 'LIKELIHOOD_UNSPECIFIED', - 'VERY_UNLIKELY', - 'UNLIKELY', - 'POSSIBLE', - 'LIKELY', - 'VERY_LIKELY', - ], - global: true, - }, - maxFindings: { - alias: 'f', - default: 0, - type: 'number', - global: true, - }, - }, - opts => - createTrigger( - opts.callingProjectId, - opts.triggerId, - opts.displayName, - opts.description, - opts.bucketName, - opts.autoPopulateTimespan, - opts.scanPeriod, - opts.infoTypes, - opts.minLikelihood, - opts.maxFindings - ) - ) - .command('list', 'List Data Loss Prevention API job triggers.', {}, opts => - listTriggers(opts.callingProjectId) - ) - .command( - 'delete ', - 'Delete a Data Loss Prevention API job trigger.', - {}, - opts => deleteTrigger(opts.triggerId) - ) - .option('c', { - type: 'string', - alias: 'callingProjectId', - default: process.env.GCLOUD_PROJECT || '', - }) - .example('node $0 create my-bucket 1') - .example('node $0 list') - .example('node $0 delete projects/my-project/jobTriggers/my-trigger') - .wrap(120) - .recommendCommands() - .epilogue('For more information, see https://cloud.google.com/dlp/docs.'); - -if (module === require.main) { - cli.help().strict().argv; // eslint-disable-line -}