Skip to content

Commit

Permalink
DLP: Added sample for de-identification with an exception list (#3101)
Browse files Browse the repository at this point in the history
* Added sample for de-identification with an exception list
Added test cases for the same

* Updated 'de-identify' to be consistent everywhere
  • Loading branch information
dinesh-crest authored Apr 17, 2023
1 parent 3b29113 commit 9a7af78
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 0 deletions.
132 changes: 132 additions & 0 deletions dlp/deIdentifyWithExceptionList.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

'use strict';

// sample-metadata:
// title: De-identify with Exception List
// description: De-identify sensitive data in a string with exceptions
// usage: node deIdentifyWithExceptionList.js my-project string words infotypes

function main(projectId, textToInspect, words, infoTypes) {
words = words.split(',');
infoTypes = transformCLI(infoTypes);
// [START dlp_deidentify_exception_list]
// Imports the Google Cloud Data Loss Prevention library
const DLP = require('@google-cloud/dlp');

// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
const dlp = new DLP.DlpServiceClient();

// TODO(developer): Replace these variables before running the sample.
// const projectId = "your-project-id";

// The string to deidentify
// const textToInspect = '[email protected] accessed customer record of [email protected]';

// Words to exclude for during inspection
// const words = ['[email protected]', '[email protected]'];

// The infoTypes of information to match
// See https://cloud.google.com/dlp/docs/concepts-infotypes for more information
// about supported infoTypes.
// const infoTypes = [{ name: 'EMAIL_ADDRESS' }];

async function deIdentifyWithExceptionList() {
// Construct item to inspect
const item = {value: textToInspect};

// Construct the custom dictionary detector associated with the word list.
const wordListDict = {
wordList: {
words: words,
},
};

// Construct a rule set that will only match if the match text does not
// contains tokens from the exclusion list.
const ruleSet = [
{
infoTypes: infoTypes,
rules: [
{
exclusionRule: {
matchingType:
DLP.protos.google.privacy.dlp.v2.MatchingType
.MATCHING_TYPE_FULL_MATCH,
dictionary: wordListDict,
},
},
],
},
];

// Combine configurations to construct inspect config.
const inspectConfig = {
infoTypes: infoTypes,
ruleSet: ruleSet,
};

// Define type of de-identification as replacement & associate de-identification type with info type.
const transformation = {
infoTypes: [],
primitiveTransformation: {
replaceWithInfoTypeConfig: {},
},
};

// Construct the configuration for the de-identification request and list all desired transformations.
const deidentifyConfig = {
infoTypeTransformations: {
transformations: [transformation],
},
};

// Combine configurations into a request for the service.
const request = {
parent: `projects/${projectId}/locations/global`,
item: item,
inspectConfig: inspectConfig,
deidentifyConfig: deidentifyConfig,
};

// Send the request and receive response from the service.
const [response] = await dlp.deidentifyContent(request);

// Print the results
console.log(
`Text after replace with infotype config: ${response.item.value}`
);
}

deIdentifyWithExceptionList();
// [END dlp_deidentify_exception_list]
}

main(...process.argv.slice(2));
process.on('unhandledRejection', err => {
console.error(err.message);
process.exitCode = 1;
});

function transformCLI(infoTypes) {
infoTypes = infoTypes
? infoTypes.split(',').map(type => {
return {name: type};
})
: undefined;
return infoTypes;
}
36 changes: 36 additions & 0 deletions dlp/system-test/deid.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,40 @@ describe('deid', () => {
}
assert.include(output, 'INVALID_ARGUMENT');
});

// dlp_deidentify_exception_list
it('should exclude the words during inspection', () => {
const textToInspect =
'[email protected] accessed customer record of [email protected]';
const words = '[email protected],[email protected]';
const infoTypes = 'EMAIL_ADDRESS';
let output;
try {
output = execSync(
`node deIdentifyWithExceptionList.js ${projectId} "${textToInspect}" "${words}" "${infoTypes}"`
);
} catch (err) {
output = err.message;
}
assert.include(
output,
'[email protected] accessed customer record of [EMAIL_ADDRESS]'
);
});

it('should handle deidentification errors', () => {
const textToInspect =
'[email protected] accessed customer record of [email protected]';
const words = '[email protected],[email protected]';
const infoTypes = 'EMAIL_ADDRESS';
let output;
try {
output = execSync(
`node deIdentifyWithExceptionList.js 'BAD_PROJECT_ID' "${textToInspect}" "${words}" "${infoTypes}"`
);
} catch (err) {
output = err.message;
}
assert.include(output, 'INVALID_ARGUMENT');
});
});

0 comments on commit 9a7af78

Please sign in to comment.