-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: refactor all DLP samples and tests (#520)
* chore: refactor all DLP samples and tests
- Loading branch information
Showing
44 changed files
with
3,487 additions
and
3,360 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
// Copyright 2020 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
'use strict'; | ||
|
||
// sample-metadata: | ||
// title: Categorical Risk Analysis | ||
// description: Computes risk metrics of a column of data in a Google BigQuery table. | ||
// usage: node categoricalRiskAnalysis.js my-project nhtsa_traffic_fatalities accident_2015 state_name my-topic my-subscription bigquery-public-data | ||
|
||
function main( | ||
projectId, | ||
tableProjectId, | ||
datasetId, | ||
tableId, | ||
columnName, | ||
topicId, | ||
subscriptionId | ||
) { | ||
// [START dlp_categorical_stats] | ||
// Import the Google Cloud client libraries | ||
const DLP = require('@google-cloud/dlp'); | ||
const {PubSub} = require('@google-cloud/pubsub'); | ||
|
||
// Instantiates clients | ||
const dlp = new DLP.DlpServiceClient(); | ||
const pubsub = new PubSub(); | ||
|
||
// The project ID to run the API call under | ||
// const projectId = 'my-project'; | ||
|
||
// The project ID the table is stored under | ||
// This may or (for public datasets) may not equal the calling project ID | ||
// const tableProjectId = 'my-project'; | ||
|
||
// The ID of the dataset to inspect, e.g. 'my_dataset' | ||
// const datasetId = 'my_dataset'; | ||
|
||
// The ID of the table to inspect, e.g. 'my_table' | ||
// const tableId = 'my_table'; | ||
|
||
// The name of the Pub/Sub topic to notify once the job completes | ||
// TODO(developer): create a Pub/Sub topic to use for this | ||
// const topicId = 'MY-PUBSUB-TOPIC' | ||
|
||
// The name of the Pub/Sub subscription to use when listening for job | ||
// completion notifications | ||
// TODO(developer): create a Pub/Sub subscription to use for this | ||
// const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' | ||
|
||
// The name of the column to compute risk metrics for, e.g. 'firstName' | ||
// const columnName = 'firstName'; | ||
async function categoricalRiskAnalysis() { | ||
const sourceTable = { | ||
projectId: tableProjectId, | ||
datasetId: datasetId, | ||
tableId: tableId, | ||
}; | ||
|
||
// Construct request for creating a risk analysis job | ||
const request = { | ||
parent: `projects/${projectId}/locations/global`, | ||
riskJob: { | ||
privacyMetric: { | ||
categoricalStatsConfig: { | ||
field: { | ||
name: columnName, | ||
}, | ||
}, | ||
}, | ||
sourceTable: sourceTable, | ||
actions: [ | ||
{ | ||
pubSub: { | ||
topic: `projects/${projectId}/topics/${topicId}`, | ||
}, | ||
}, | ||
], | ||
}, | ||
}; | ||
|
||
// Create helper function for unpacking values | ||
const getValue = obj => obj[Object.keys(obj)[0]]; | ||
|
||
// Run risk analysis job | ||
const [topicResponse] = await pubsub.topic(topicId).get(); | ||
const subscription = await topicResponse.subscription(subscriptionId); | ||
const [jobsResponse] = await dlp.createDlpJob(request); | ||
const jobName = jobsResponse.name; | ||
// Watch the Pub/Sub topic until the DLP job finishes | ||
await new Promise((resolve, reject) => { | ||
const messageHandler = message => { | ||
if (message.attributes && message.attributes.DlpJobName === jobName) { | ||
message.ack(); | ||
subscription.removeListener('message', messageHandler); | ||
subscription.removeListener('error', errorHandler); | ||
resolve(jobName); | ||
} else { | ||
message.nack(); | ||
} | ||
}; | ||
|
||
const errorHandler = err => { | ||
subscription.removeListener('message', messageHandler); | ||
subscription.removeListener('error', errorHandler); | ||
reject(err); | ||
}; | ||
|
||
subscription.on('message', messageHandler); | ||
subscription.on('error', errorHandler); | ||
}); | ||
setTimeout(() => { | ||
console.log(' Waiting for DLP job to fully complete'); | ||
}, 500); | ||
const [job] = await dlp.getDlpJob({name: jobName}); | ||
const histogramBuckets = | ||
job.riskDetails.categoricalStatsResult.valueFrequencyHistogramBuckets; | ||
histogramBuckets.forEach((histogramBucket, histogramBucketIdx) => { | ||
console.log(`Bucket ${histogramBucketIdx}:`); | ||
|
||
// Print bucket stats | ||
console.log( | ||
` Most common value occurs ${histogramBucket.valueFrequencyUpperBound} time(s)` | ||
); | ||
console.log( | ||
` Least common value occurs ${histogramBucket.valueFrequencyLowerBound} time(s)` | ||
); | ||
|
||
// Print bucket values | ||
console.log(`${histogramBucket.bucketSize} unique values total.`); | ||
histogramBucket.bucketValues.forEach(valueBucket => { | ||
console.log( | ||
` Value ${getValue(valueBucket.value)} occurs ${ | ||
valueBucket.count | ||
} time(s).` | ||
); | ||
}); | ||
}); | ||
} | ||
|
||
categoricalRiskAnalysis(); | ||
// [END dlp_categorical_stats] | ||
} | ||
|
||
main(...process.argv.slice(2)); | ||
process.on('unhandledRejection', err => { | ||
console.error(err.message); | ||
process.exitCode = 1; | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
// Copyright 2020 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
'use strict'; | ||
|
||
// sample-metadata: | ||
// title: Inspect Templates | ||
// description: Create a new DLP inspection configuration template. | ||
// usage: node createInspectTemplate.js my-project VERY_LIKELY PERSON_NAME 5 false my-template-id | ||
|
||
function main( | ||
projectId, | ||
templateId, | ||
displayName, | ||
infoTypes, | ||
includeQuote, | ||
minLikelihood, | ||
maxFindings | ||
) { | ||
infoTypes = transformCLI(infoTypes); | ||
// [START dlp_create_inspect_template] | ||
// Imports the Google Cloud Data Loss Prevention library | ||
const DLP = require('@google-cloud/dlp'); | ||
|
||
// Instantiates a client | ||
const dlp = new DLP.DlpServiceClient(); | ||
|
||
// The project ID to run the API call under | ||
// const projectId = 'my-project'; | ||
|
||
// The minimum likelihood required before returning a match | ||
// const minLikelihood = 'LIKELIHOOD_UNSPECIFIED'; | ||
|
||
// The maximum number of findings to report per request (0 = server maximum) | ||
// const maxFindings = 0; | ||
|
||
// The infoTypes of information to match | ||
// const infoTypes = [{ name: 'PHONE_NUMBER' }, { name: 'EMAIL_ADDRESS' }, { name: 'CREDIT_CARD_NUMBER' }]; | ||
|
||
// Whether to include the matching string | ||
// const includeQuote = true; | ||
|
||
// (Optional) The name of the template to be created. | ||
// const templateId = 'my-template'; | ||
|
||
// (Optional) The human-readable name to give the template | ||
// const displayName = 'My template'; | ||
|
||
async function createInspectTemplate() { | ||
// Construct the inspection configuration for the template | ||
const inspectConfig = { | ||
infoTypes: infoTypes, | ||
minLikelihood: minLikelihood, | ||
includeQuote: includeQuote, | ||
limits: { | ||
maxFindingsPerRequest: maxFindings, | ||
}, | ||
}; | ||
|
||
// Construct template-creation request | ||
const request = { | ||
parent: `projects/${projectId}/locations/global`, | ||
inspectTemplate: { | ||
inspectConfig: inspectConfig, | ||
displayName: displayName, | ||
}, | ||
templateId: templateId, | ||
}; | ||
|
||
const [response] = await dlp.createInspectTemplate(request); | ||
const templateName = response.name; | ||
console.log(`Successfully created template ${templateName}.`); | ||
} | ||
createInspectTemplate(); | ||
// [END dlp_create_inspect_template] | ||
} | ||
|
||
main(...process.argv.slice(2)); | ||
process.on('unhandledRejection', err => { | ||
console.error(err.message); | ||
process.exitCode = 1; | ||
}); | ||
|
||
function transformCLI(infoTypes) { | ||
infoTypes = infoTypes | ||
? infoTypes.split(',').map(type => { | ||
return {name: type}; | ||
}) | ||
: undefined; | ||
return infoTypes; | ||
} |
Oops, something went wrong.