diff --git a/dlp/custom_infotype.py b/dlp/custom_infotype.py index 73cdec396024..1c51c95b7366 100644 --- a/dlp/custom_infotype.py +++ b/dlp/custom_infotype.py @@ -84,6 +84,78 @@ def omit_name_if_also_email( # [END dlp_omit_name_if_also_email] + +# [START inspect_with_person_name_w_custom_hotword] +def inspect_with_person_name_w_custom_hotword( + project, + content_string, + custom_hotword="patient" +): + """Uses the Data Loss Prevention API increase likelihood for matches on + PERSON_NAME if the user specified custom hotword is present. Only + includes findings with the increased likelihood by setting a minimum + likelihood threshold of VERY_LIKELY. + Args: + project: The Google Cloud project id to use as a parent resource. + content_string: The string to inspect. + custom_hotword: The custom hotword used for likelihood boosting. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library. + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Construct a rule set with caller provided hotword, with a likelihood + # boost to VERY_LIKELY when the hotword are present within the 50 character- + # window preceding the PII finding. + hotword_rule = { + "hotword_regex": {"pattern": custom_hotword}, + "likelihood_adjustment": {"fixed_likelihood": "VERY_LIKELY"}, + "proximity": {"window_before": 50}, + } + + rule_set = [ + { + "info_types": [{"name": "PERSON_NAME"}], + "rules": [{"hotword_rule": hotword_rule}], + } + ] + + # Construct the configuration dictionary with the custom regex info type. + inspect_config = { + "rule_set": rule_set, + "min_likelihood": "VERY_LIKELY", + } + + # Construct the `item`. + item = {"value": content_string} + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Call the API. + response = dlp.inspect_content(parent, inspect_config, item) + + # Print out the results. + if response.result.findings: + for finding in response.result.findings: + try: + if finding.quote: + print(f"Quote: {finding.quote}") + except AttributeError: + pass + print(f"Info type: {finding.info_type.name}") + print(f"Likelihood: {finding.likelihood}") + else: + print("No findings.") + +# [END inspect_with_person_name_w_custom_hotword] + + # [START dlp_inspect_with_medical_record_number_custom_regex_detector] def inspect_with_medical_record_number_custom_regex_detector( project, diff --git a/dlp/custom_infotype_test.py b/dlp/custom_infotype_test.py index d4a0ea669ebe..964191acdf27 100644 --- a/dlp/custom_infotype_test.py +++ b/dlp/custom_infotype_test.py @@ -28,6 +28,15 @@ def test_omit_name_if_also_email(capsys): assert info_types[0] == "EMAIL_ADDRESS" +def test_inspect_with_person_name_w_custom_hotword(capsys): + custom_infotype.inspect_with_person_name_w_custom_hotword( + GCLOUD_PROJECT, "patient's name is John Doe.", "patient") + + out, _ = capsys.readouterr() + assert "Info type: PERSON_NAME" in out + assert "Likelihood: 5" in out + + def test_inspect_with_medical_record_number_custom_regex_detector(capsys): custom_infotype.inspect_with_medical_record_number_custom_regex_detector( GCLOUD_PROJECT, "Patients MRN 444-5-22222")