Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: correct dataset name, use env var for project #2621

Merged
merged 33 commits into from
Feb 10, 2020
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
f377d8a
fix: correct dataset name, use env var for project
leahecole Dec 16, 2019
bf3dc08
Merge branch 'master' into fix_dlp_tests
leahecole Dec 17, 2019
131dfc1
Merge branch 'master' into fix_dlp_tests
kurtisvg Dec 18, 2019
c1735af
Merge branch 'master' of github.com:GoogleCloudPlatform/python-docs-s…
leahecole Jan 2, 2020
86fb1b9
Merge branch 'master' of github.com:GoogleCloudPlatform/python-docs-s…
leahecole Jan 3, 2020
352785e
Add uuids to tests
leahecole Jan 3, 2020
503e628
add uuids and fixtures for bq
leahecole Jan 4, 2020
09465fc
Merge branch 'master' into fix_dlp_tests
leahecole Jan 4, 2020
d3a108a
Merge branch 'master' into fix_dlp_tests
leahecole Jan 6, 2020
e842941
Add logic to delete job
leahecole Jan 6, 2020
d40c636
Merge branch 'master' into fix_dlp_tests
leahecole Jan 6, 2020
e8e3019
ran black
leahecole Jan 6, 2020
b216953
Merge branch 'fix_dlp_tests' of github.com:GoogleCloudPlatform/python…
leahecole Jan 6, 2020
bc72e88
Run black with line length
leahecole Jan 6, 2020
ac8b39e
Add utf encoding for python 2 tests
leahecole Jan 7, 2020
0df064c
Merge branch 'master' into fix_dlp_tests
leahecole Jan 7, 2020
5559087
Merge branch 'master' of github.com:GoogleCloudPlatform/python-docs-s…
leahecole Jan 10, 2020
861ebc7
Merge branch 'master' into fix_dlp_tests
leahecole Jan 14, 2020
0337c44
Merge branch 'master' of github.com:GoogleCloudPlatform/python-docs-s…
leahecole Jan 16, 2020
396b534
Merge branch 'master' into fix_dlp_tests
leahecole Jan 22, 2020
c6e813e
Merge branch 'fix_dlp_tests' of github.com:GoogleCloudPlatform/python…
leahecole Feb 3, 2020
ef0a54c
Add skips for now
leahecole Feb 4, 2020
e38e348
Merge branch 'master' into fix_dlp_tests
leahecole Feb 4, 2020
cd374af
Ran black
leahecole Feb 4, 2020
61b3522
Remove skips, adjust job tests
leahecole Feb 5, 2020
93cc1a8
fix lint and skips
leahecole Feb 5, 2020
d5c26c3
Merge branch 'master' into fix_dlp_tests
leahecole Feb 5, 2020
c7db451
Merge branch 'master' into fix_dlp_tests
leahecole Feb 5, 2020
9264d58
Merge branch 'master' into fix_dlp_tests
leahecole Feb 6, 2020
7aa8607
Cleanup commented things
leahecole Feb 6, 2020
7797ce0
Merge branch 'master' into fix_dlp_tests
leahecole Feb 6, 2020
b7770ba
Merge branch 'master' into fix_dlp_tests
leahecole Feb 7, 2020
8418d57
Merge branch 'master' into fix_dlp_tests
leahecole Feb 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 38 additions & 13 deletions dlp/deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ def deidentify_with_mask(
parent = dlp.project_path(project)

# Construct inspect configuration dictionary
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
inspect_config = {
"info_types": [{"name": info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
Expand Down Expand Up @@ -131,17 +133,24 @@ def deidentify_with_fpe(
# Construct FPE configuration dictionary
crypto_replace_ffx_fpe_config = {
"crypto_key": {
"kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name}
"kms_wrapped": {
"wrapped_key": wrapped_key,
"crypto_key_name": key_name,
}
},
"common_alphabet": alphabet,
}

# Add surrogate type
if surrogate_type:
crypto_replace_ffx_fpe_config["surrogate_info_type"] = {"name": surrogate_type}
crypto_replace_ffx_fpe_config["surrogate_info_type"] = {
"name": surrogate_type
}

# Construct inspect configuration dictionary
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
inspect_config = {
"info_types": [{"name": info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
Expand Down Expand Up @@ -176,7 +185,12 @@ def deidentify_with_fpe(

# [START dlp_reidentify_fpe]
def reidentify_with_fpe(
project, string, alphabet=None, surrogate_type=None, key_name=None, wrapped_key=None
project,
string,
alphabet=None,
surrogate_type=None,
key_name=None,
wrapped_key=None,
):
"""Uses the Data Loss Prevention API to reidentify sensitive data in a
string that was encrypted by Format Preserving Encryption (FPE).
Expand Down Expand Up @@ -333,7 +347,11 @@ def map_data(value):
try:
date = datetime.strptime(value, "%m/%d/%Y")
return {
"date_value": {"year": date.year, "month": date.month, "day": date.day}
"date_value": {
"year": date.year,
"month": date.month,
"day": date.day,
}
}
except ValueError:
return {"string_value": value}
Expand Down Expand Up @@ -426,7 +444,8 @@ def write_data(data):

mask_parser = subparsers.add_parser(
"deid_mask",
help="Deidentify sensitive data in a string by masking it with a " "character.",
help="Deidentify sensitive data in a string by masking it with a "
"character.",
)
mask_parser.add_argument(
"--info_types",
Expand All @@ -438,7 +457,8 @@ def write_data(data):
default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"],
)
mask_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
mask_parser.add_argument("item", help="The string to deidentify.")
mask_parser.add_argument(
Expand Down Expand Up @@ -471,11 +491,13 @@ def write_data(data):
default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"],
)
fpe_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
fpe_parser.add_argument(
"item",
help="The string to deidentify. " "Example: string = 'My SSN is 372819127'",
help="The string to deidentify. "
"Example: string = 'My SSN is 372819127'",
)
fpe_parser.add_argument(
"key_name",
Expand Down Expand Up @@ -513,11 +535,13 @@ def write_data(data):
"Encryption (FPE).",
)
reid_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
reid_parser.add_argument(
"item",
help="The string to deidentify. " "Example: string = 'My SSN is 372819127'",
help="The string to deidentify. "
"Example: string = 'My SSN is 372819127'",
)
reid_parser.add_argument(
"surrogate_type",
Expand Down Expand Up @@ -553,7 +577,8 @@ def write_data(data):
help="Deidentify dates in a CSV file by pseudorandomly shifting them.",
)
date_shift_parser.add_argument(
"project", help="The Google Cloud project id to use as a parent resource."
"project",
help="The Google Cloud project id to use as a parent resource.",
)
date_shift_parser.add_argument(
"input_csv_file",
Expand Down
5 changes: 4 additions & 1 deletion dlp/deid_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ def test_deidentify_with_mask_masking_character_specified(capsys):

def test_deidentify_with_mask_masking_number_specified(capsys):
deid.deidentify_with_mask(
GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7
GCLOUD_PROJECT,
HARMFUL_STRING,
["US_SOCIAL_SECURITY_NUMBER"],
number_to_mask=7,
)

out, _ = capsys.readouterr()
Expand Down
17 changes: 12 additions & 5 deletions dlp/inspect_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,9 @@ def inspect_table(
headers = [{"name": val} for val in data["header"]]
rows = []
for row in data["rows"]:
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
rows.append(
{"values": [{"string_value": cell_val} for cell_val in row]}
)

table = {}
table["headers"] = headers
Expand Down Expand Up @@ -978,7 +980,9 @@ def callback(message):
)

parser_file = subparsers.add_parser("file", help="Inspect a local file.")
parser_file.add_argument("filename", help="The path to the file to inspect.")
parser_file.add_argument(
"filename", help="The path to the file to inspect."
)
parser_file.add_argument(
"--project",
help="The Google Cloud project id to use as a parent resource.",
Expand Down Expand Up @@ -1121,10 +1125,12 @@ def callback(message):
"datastore", help="Inspect files on Google Datastore."
)
parser_datastore.add_argument(
"datastore_project", help="The Google Cloud project id of the target Datastore."
"datastore_project",
help="The Google Cloud project id of the target Datastore.",
)
parser_datastore.add_argument(
"kind", help='The kind of the Datastore entity to inspect, e.g. "Person".'
"kind",
help='The kind of the Datastore entity to inspect, e.g. "Person".',
)
parser_datastore.add_argument(
"topic_id",
Expand Down Expand Up @@ -1200,7 +1206,8 @@ def callback(message):
"bigquery", help="Inspect files on Google BigQuery."
)
parser_bigquery.add_argument(
"bigquery_project", help="The Google Cloud project id of the target table."
"bigquery_project",
help="The Google Cloud project id of the target table.",
)
parser_bigquery.add_argument(
"dataset_id", help="The ID of the target BigQuery dataset."
Expand Down
37 changes: 26 additions & 11 deletions dlp/inspect_content_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import os
import uuid

from gcp_devrel.testing import eventually_consistent
from gcp_devrel.testing.flaky import flaky
Expand All @@ -26,16 +27,18 @@
import pytest
import inspect_content

UNIQUE_STRING = str(uuid.uuid4()).split("-")[0]

GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test"
TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING
RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources")
RESOURCE_FILE_NAMES = ["test.txt", "test.png", "harmless.txt", "accounts.txt"]
TOPIC_ID = "dlp-test"
SUBSCRIPTION_ID = "dlp-test-subscription"
TOPIC_ID = "dlp-test" + UNIQUE_STRING
SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING
DATASTORE_KIND = "DLP test kind"
BIGQUERY_DATASET_ID = "dlp_test_dataset"
BIGQUERY_TABLE_ID = "dlp_test_table"
DATASTORE_NAME = "DLP test object" + UNIQUE_STRING
BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -91,7 +94,9 @@ def subscription_id(topic_id):
# Subscribes to a topic.
subscriber = google.cloud.pubsub.SubscriberClient()
topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id)
subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID)
subscription_path = subscriber.subscription_path(
GCLOUD_PROJECT, SUBSCRIPTION_ID
)
try:
subscriber.create_subscription(subscription_path, topic_path)
except google.api_core.exceptions.AlreadyExists:
Expand All @@ -108,7 +113,7 @@ def datastore_project():
datastore_client = google.cloud.datastore.Client()

kind = DATASTORE_KIND
name = "DLP test object"
name = DATASTORE_NAME
key = datastore_client.key(kind, name)
item = google.cloud.datastore.Entity(key=key)
item["payload"] = "My name is Gary Smith and my email is gary@example.com"
Expand Down Expand Up @@ -159,7 +164,10 @@ def test_inspect_string(capsys):
test_string = "My name is Gary Smith and my email is gary@example.com"

inspect_content.inspect_string(
GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True
GCLOUD_PROJECT,
test_string,
["FIRST_NAME", "EMAIL_ADDRESS"],
include_quote=True,
)

out, _ = capsys.readouterr()
Expand Down Expand Up @@ -211,7 +219,10 @@ def test_inspect_string_no_results(capsys):
test_string = "Nothing to see here"

inspect_content.inspect_string(
GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True
GCLOUD_PROJECT,
test_string,
["FIRST_NAME", "EMAIL_ADDRESS"],
include_quote=True,
)

out, _ = capsys.readouterr()
Expand Down Expand Up @@ -320,7 +331,9 @@ def test_inspect_gcs_file_with_custom_info_types(


@flaky
def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys):
def test_inspect_gcs_file_no_results(
bucket, topic_id, subscription_id, capsys
):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
Expand Down Expand Up @@ -367,7 +380,9 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):


@flaky
def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys):
def test_inspect_datastore(
datastore_project, topic_id, subscription_id, capsys
):
@eventually_consistent.call
def _():
inspect_content.inspect_datastore(
Expand Down
19 changes: 14 additions & 5 deletions dlp/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def list_dlp_jobs(project, filter_string=None, job_type=None):

# Job type dictionary
job_type_to_int = {
"DLP_JOB_TYPE_UNSPECIFIED": google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED,
"DLP_JOB_TYPE_UNSPECIFIED":
google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED,
"INSPECT_JOB": google.cloud.dlp.enums.DlpJobType.INSPECT_JOB,
"RISK_ANALYSIS_JOB": google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB,
}
Expand Down Expand Up @@ -122,7 +123,8 @@ def delete_dlp_job(project, job_name):

list_parser = subparsers.add_parser(
"list",
help="List Data Loss Prevention API jobs corresponding to a given " "filter.",
help="List Data Loss Prevention API jobs corresponding to a given "
"filter.",
)
list_parser.add_argument(
"project", help="The project id to use as a parent resource."
Expand All @@ -135,7 +137,11 @@ def delete_dlp_job(project, job_name):
list_parser.add_argument(
"-t",
"--type",
choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB"],
choices=[
"DLP_JOB_TYPE_UNSPECIFIED",
"INSPECT_JOB",
"RISK_ANALYSIS_JOB",
],
help='The type of job. API defaults to "INSPECT"',
)

Expand All @@ -147,12 +153,15 @@ def delete_dlp_job(project, job_name):
)
delete_parser.add_argument(
"job_name",
help="The name of the DlpJob resource to be deleted. " "Example: X-#####",
help="The name of the DlpJob resource to be deleted. "
"Example: X-#####",
)

args = parser.parse_args()

if args.content == "list":
list_dlp_jobs(args.project, filter_string=args.filter, job_type=args.type)
list_dlp_jobs(
args.project, filter_string=args.filter, job_type=args.type
)
elif args.content == "delete":
delete_dlp_job(args.project, args.job_name)
Loading