Skip to content
This repository has been archived by the owner on Dec 10, 2023. It is now read-only.

Commit

Permalink
Add custom infoType snippets to DLP samples [(#3991)](GoogleCloudPlat…
Browse files Browse the repository at this point in the history
  • Loading branch information
sethmoo authored Jun 9, 2020
1 parent ce78cdc commit f10bfd0
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 0 deletions.
85 changes: 85 additions & 0 deletions samples/snippets/custom_infotype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Custom infoType snippets.
This file contains sample code that uses the Data Loss Prevention API to create
custom infoType detectors to refine scan results.
"""


# [START dlp_omit_name_if_also_email]
def omit_name_if_also_email(
project,
content_string,
):
"""Marches PERSON_NAME and EMAIL_ADDRESS, but not both.
Uses the Data Loss Prevention API omit matches on PERSON_NAME if the
EMAIL_ADDRESS detector also matches.
Args:
project: The Google Cloud project id to use as a parent resource.
content_string: The string to inspect.
Returns:
None; the response from the API is printed to the terminal.
"""

# Import the client library.
import google.cloud.dlp

# Instantiate a client.
dlp = google.cloud.dlp_v2.DlpServiceClient()

# Construct a list of infoTypes for DLP to locate in `content_string`. See
# https://cloud.google.com/dlp/docs/concepts-infotypes for more information
# about supported infoTypes.
info_types_to_locate = [{"name": "PERSON_NAME"}, {"name": "EMAIL_ADDRESS"}]

# Construct the configuration dictionary that will only match on PERSON_NAME
# if the EMAIL_ADDRESS doesn't also match. This configuration helps reduce
# the total number of findings when there is a large overlap between different
# infoTypes.
inspect_config = {
"info_types":
info_types_to_locate,
"rule_set": [{
"info_types": [{
"name": "PERSON_NAME"
}],
"rules": [{
"exclusion_rule": {
"exclude_info_types": {
"info_types": [{
"name": "EMAIL_ADDRESS"
}]
},
"matching_type": "MATCHING_TYPE_PARTIAL_MATCH"
}
}]
}]
}

# Construct the `item`.
item = {"value": content_string}

# Convert the project id into a full resource id.
parent = dlp.project_path(project)

# Call the API.
response = dlp.inspect_content(parent, inspect_config, item)

return [f.info_type.name for f in response.result.findings]


# [END dlp_omit_name_if_also_email]
28 changes: 28 additions & 0 deletions samples/snippets/custom_infotype_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import custom_infotype

GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")


def test_omit_name_if_also_email(capsys):
info_types = custom_infotype.omit_name_if_also_email(
GCLOUD_PROJECT, "alice@example.com")

# Ensure we found only EMAIL_ADDRESS, and not PERSON_NAME.
assert len(info_types) == 1
assert info_types[0] == "EMAIL_ADDRESS"

0 comments on commit f10bfd0

Please sign in to comment.