From c68e7e760602234cf08a09ad2d2be0d66fa73732 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Tue, 19 Dec 2017 11:00:15 -0800 Subject: [PATCH 01/11] temporary commit --- dlp/inspect_file.py | 92 +++++++++++++++++++++++++++++++++++++++++ dlp/inspect_gcs_file.py | 92 +++++++++++++++++++++++++++++++++++++++++ dlp/inspect_string.py | 82 ++++++++++++++++++++++++++++++++++++ 3 files changed, 266 insertions(+) create mode 100644 dlp/inspect_file.py create mode 100644 dlp/inspect_gcs_file.py create mode 100644 dlp/inspect_string.py diff --git a/dlp/inspect_file.py b/dlp/inspect_file.py new file mode 100644 index 000000000000..c132c8861057 --- /dev/null +++ b/dlp/inspect_file.py @@ -0,0 +1,92 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + + +# [START inspect_file] +def inspect_file(filename, info_types=None, min_likelihood=None, + max_findings=None, include_quote=True, mime_type=None): + """Uses the Data Loss Prevention API to analyze a file for protected data. + Args: + filename: The path to the file to inspect. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + include_quote: Boolean for whether to display a quote of the detected + information in the results. + mime_type: The MIME type of the file. If not specified, the type is + inferred via the Python standard library's mimetypes module. + Returns: + None; the response from the API is printed to the terminal. + """ + + import mimetypes + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_type by converting the list of strings into a list of + # dictionaries (protos are also accepted). + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'max_findings': max_findings, + 'include_quote': include_quote, + } + + # If mime_type is not specified, guess it from the filename. + if mime_type is None: + mime_guess = mimetypes.MimeTypes().guess_type(filename) + mime_type = mime_guess[0] or 'application/octet-stream' + + # Construct the items list by reading the file as a binary string. + with open(filename, mode='rb') as f: + items = [{'type': mime_type, 'data': f.read()}] + + # Call the API. + response = dlp.inspect_content(inspect_config, items) + + # Print out the results. + if response.results[0].findings: + for finding in response.results[0].findings: + try: + print('Quote: {}'.format(finding.quote)) + except AttributeError: + pass + print('Info type: {}'.format(finding.info_type.name)) + print('Likelihood: {}'.format(finding.likelihood)) + else: + print('No findings.') +# [END inspect_file] + + +if __name__ == '__main__': + inspect_file("/usr/local/google/home/gorcester/Downloads/wQOVLom8Gsa.png", ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) diff --git a/dlp/inspect_gcs_file.py b/dlp/inspect_gcs_file.py new file mode 100644 index 000000000000..c4e6ad266cf7 --- /dev/null +++ b/dlp/inspect_gcs_file.py @@ -0,0 +1,92 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + + +# [START inspect_gcs_file] +def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, + max_findings=None): + """Uses the Data Loss Prevention API to analyze a string for protected data. + Args: + bucket: The name of the GCS bucket containing the file, as a string. + filename: The name of the file in the bucket, including the path, as a + string; e.g. 'images/myfile.png'. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_type by converting the list of strings into a list of + # dictionaries (protos are also accepted). + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'max_findings': max_findings, + } + + # Construct a cloud_storage_options dictionary with the file's URL. + url = 'gs://{}/{}'.format(bucket, filename) + storage_config = {'cloud_storage_options': + {'file_set': + {'url': url} + } + } + + operation = dlp.create_inspect_operation(inspect_config, storage_config, + None) + + # Get the operation result name, which can be used to look up the full + # results. This call blocks until the operation is complete; to avoid + # blocking, use operation.add_done_callback(fn) instead. + operation_result = operation.result() + + response = dlp.list_inspect_findings(operation_result.name) + + # TODO DO NOT SUBMIT: haven't successfully gotten results object so not sure this is correct + if response.result.findings: + for finding in response.result.findings: + try: + print('Quote: {}'.format(finding.quote)) + except AttributeError: + pass + print('Info type: {}'.format(finding.info_type.name)) + print('Likelihood: {}'.format(finding.likelihood)) + else: + print('No findings.') +# [END inspect_gcs_file] + +if __name__ == '__main__': + inspect_gcs_file('andrewsg-test', 'wQOVLom8Gsa.png', ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) diff --git a/dlp/inspect_string.py b/dlp/inspect_string.py new file mode 100644 index 000000000000..015a0e76ce78 --- /dev/null +++ b/dlp/inspect_string.py @@ -0,0 +1,82 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + + +# [START inspect_string] +def inspect_string(item, info_types=None, min_likelihood=None, + max_findings=None, include_quote=True): + """Uses the Data Loss Prevention API to analyze a string for protected data. + Args: + item: The string to inspect. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + include_quote: Boolean for whether to display a quote of the detected + information in the results. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_type by converting the list of strings into a list of + # dictionaries (protos are also accepted). + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'max_findings': max_findings, + 'include_quote': include_quote, + } + + # Construct the items list (in this case, only one item, in string form). + items = [{'type': 'text/plain', 'value': item}] + + # Call the API. + response = dlp.inspect_content(inspect_config, items) + + # Print out the results. + if response.results[0].findings: + for finding in response.results[0].findings: + try: + print('Quote: {}'.format(finding.quote)) + except AttributeError: + pass + print('Info type: {}'.format(finding.info_type.name)) + print('Likelihood: {}'.format(finding.likelihood)) + else: + print('No findings.') +# [END inspect_string] + + +if __name__ == '__main__': + inspect_string("I'm Gary and my email is gary@example.com", ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) From ec56479ac316cd8b1bdb3fb4704c16aa49f0a71d Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 3 Jan 2018 14:19:01 -0800 Subject: [PATCH 02/11] Add dlp tests --- dlp/inspect_file_test.py | 62 ++++++++++++++++++++++++ dlp/inspect_gcs_file.py | 3 +- dlp/inspect_gcs_test.py | 94 +++++++++++++++++++++++++++++++++++++ dlp/inspect_string.py | 1 + dlp/inspect_string_test.py | 46 ++++++++++++++++++ dlp/requirements.txt | 3 ++ dlp/resources/accounts.txt | 1 + dlp/resources/harmless.txt | 1 + dlp/resources/test.png | Bin 0 -> 21438 bytes dlp/resources/test.txt | 1 + 10 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 dlp/inspect_file_test.py create mode 100644 dlp/inspect_gcs_test.py create mode 100644 dlp/inspect_string_test.py create mode 100644 dlp/requirements.txt create mode 100644 dlp/resources/accounts.txt create mode 100644 dlp/resources/harmless.txt create mode 100644 dlp/resources/test.png create mode 100644 dlp/resources/test.txt diff --git a/dlp/inspect_file_test.py b/dlp/inspect_file_test.py new file mode 100644 index 000000000000..f885bc251e0d --- /dev/null +++ b/dlp/inspect_file_test.py @@ -0,0 +1,62 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import inspect_file + + +def test_inspect_file(capsys): + test_filepath = os.path.join( + os.path.dirname(__file__), 'resources/test.txt') + + inspect_file.inspect_file( + test_filepath, include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: EMAIL_ADDRESS' in out + + +def test_inspect_file_with_info_types(capsys): + test_filepath = os.path.join( + os.path.dirname(__file__), 'resources/test.txt') + + inspect_file.inspect_file( + test_filepath, ['PHONE_NUMBER'], include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: PHONE_NUMBER' in out + assert 'Info type: EMAIL_ADDRESS' not in out + + +def test_inspect_file_no_results(capsys): + test_filepath = os.path.join( + os.path.dirname(__file__), 'resources/harmless.txt') + + inspect_file.inspect_file( + test_filepath, include_quote=True) + + out, _ = capsys.readouterr() + assert 'No findings' in out + + +def test_inspect_image_file(capsys): + test_filepath = os.path.join( + os.path.dirname(__file__), 'resources/test.png') + + inspect_file.inspect_file( + test_filepath, include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: PHONE_NUMBER' in out diff --git a/dlp/inspect_gcs_file.py b/dlp/inspect_gcs_file.py index c4e6ad266cf7..94a854bdc871 100644 --- a/dlp/inspect_gcs_file.py +++ b/dlp/inspect_gcs_file.py @@ -89,4 +89,5 @@ def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, # [END inspect_gcs_file] if __name__ == '__main__': - inspect_gcs_file('andrewsg-test', 'wQOVLom8Gsa.png', ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) +# inspect_gcs_file('andrewsg-test', 'wQOVLom8Gsa.png', ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) + inspect_gcs_file('nodejs-docs-samples-dlp', 'test.txt', ["EMAIL_ADDRESS", "PHONE_NUMBER"]) \ No newline at end of file diff --git a/dlp/inspect_gcs_test.py b/dlp/inspect_gcs_test.py new file mode 100644 index 000000000000..b5a10bb7a1eb --- /dev/null +++ b/dlp/inspect_gcs_test.py @@ -0,0 +1,94 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + +import google.cloud.exceptions +import google.cloud.storage + +import inspect_gcs_file + +GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT') +TEST_BUCKET_NAME = GCLOUD_PROJECT + '-dlp-python-client-test' +RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), 'resources') +RESOURCE_FILE_NAMES = ['test.txt', 'test.png', 'harmless.txt', 'accounts.txt'] + + +@pytest.fixture(scope='module') +def bucket(request): + # Creates a GCS bucket, uploads files required for the test, and tears down + # the entire bucket afterwards. + + client = google.cloud.storage.Client() + try: + bucket = client.get_bucket(TEST_BUCKET_NAME) + except google.cloud.exceptions.NotFound: + bucket = client.create_bucket(TEST_BUCKET_NAME) + + # Upoad the blobs and keep track of them in a list. + blobs = [] + for name in RESOURCE_FILE_NAMES: + path = os.path.join(RESOURCE_DIRECTORY, name) + blob = bucket.blob(name) + blob.upload_from_filename(path) + blobs.append(blob) + + # Yield the object to the test code; lines after this execute as a teardown. + yield bucket + + for blob in blobs: + blob.delete() + + # Attempt to delete the bucket; this will only work if it is empty. + bucket.delete() + + print('teardown complete') + + +def test_inspect_gcs_file(bucket, capsys): + inspect_gcs_file.inspect_gcs_file(bucket.name, 'test.txt') + + out, _ = capsys.readouterr() + assert 'Info type: EMAIL_ADDRESS' in out + + +def test_inspect_gcs_file_with_info_types(bucket, capsys): + inspect_gcs_file.inspect_gcs_file( + bucket.name, 'test.txt', info_types=['EMAIL_ADDRESS']) + + out, _ = capsys.readouterr() + assert 'Info type: EMAIL_ADDRESS' in out + + +def test_inspect_gcs_file_no_results(bucket, capsys): + inspect_gcs_file.inspect_gcs_file(bucket.name, 'harmless.txt') + + out, _ = capsys.readouterr() + assert 'No findings' in out + + +def test_inspect_gcs_image_file(bucket, capsys): + inspect_gcs_file.inspect_gcs_file(bucket.name, 'test.png') + + out, _ = capsys.readouterr() + assert 'Info type: EMAIL_ADDRESS' in out + +def test_inspect_gcs_multiple_file(bucket, capsys): + inspect_gcs_file.inspect_gcs_file(bucket.name, '*') + + out, _ = capsys.readouterr() + assert 'Info type: PHONE_NUMBER' in out + assert 'Info type: CREDIT_CARD' in out diff --git a/dlp/inspect_string.py b/dlp/inspect_string.py index 015a0e76ce78..33570c215b20 100644 --- a/dlp/inspect_string.py +++ b/dlp/inspect_string.py @@ -80,3 +80,4 @@ def inspect_string(item, info_types=None, min_likelihood=None, if __name__ == '__main__': inspect_string("I'm Gary and my email is gary@example.com", ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) + # DO NOT SUBMIT \ No newline at end of file diff --git a/dlp/inspect_string_test.py b/dlp/inspect_string_test.py new file mode 100644 index 000000000000..78f53aaeb712 --- /dev/null +++ b/dlp/inspect_string_test.py @@ -0,0 +1,46 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect_string + + +def test_inspect_string(capsys): + test_string = 'I am Gary and my email is gary@example.com' + + inspect_string.inspect_string( + test_string, include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: EMAIL_ADDRESS' in out + + +def test_inspect_string_with_info_types(capsys): + test_string = 'I am Gary and my email is gary@example.com' + + inspect_string.inspect_string( + test_string, info_types=['US_MALE_NAME'], include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: US_MALE_NAME' in out + assert 'Info type: EMAIL_ADDRESS' not in out + + +def test_inspect_string_no_results(capsys): + test_string = 'Nothing to see here' + + inspect_string.inspect_string( + test_string, include_quote=True) + + out, _ = capsys.readouterr() + assert 'No findings' in out diff --git a/dlp/requirements.txt b/dlp/requirements.txt new file mode 100644 index 000000000000..23f5d1a115c1 --- /dev/null +++ b/dlp/requirements.txt @@ -0,0 +1,3 @@ +# DO NOT SUBMIT +/usr/local/google/home/gorcester/src/google-cloud-python/dlp +google-cloud-storage diff --git a/dlp/resources/accounts.txt b/dlp/resources/accounts.txt new file mode 100644 index 000000000000..2763cd0ab820 --- /dev/null +++ b/dlp/resources/accounts.txt @@ -0,0 +1 @@ +My credit card number is 1234 5678 9012 3456, and my CVV is 789. \ No newline at end of file diff --git a/dlp/resources/harmless.txt b/dlp/resources/harmless.txt new file mode 100644 index 000000000000..5666de37ab23 --- /dev/null +++ b/dlp/resources/harmless.txt @@ -0,0 +1 @@ +This file is mostly harmless. diff --git a/dlp/resources/test.png b/dlp/resources/test.png new file mode 100644 index 0000000000000000000000000000000000000000..8f32c825884261083b7d731676375303d49ca6f6 GIT binary patch literal 21438 zcmagE1yo)=(>96}ifeIqcZVA&?(XjH?(SY3ihGe_#oce*-QC@tL!Vc^?_cLX>+H4m zPIfXgu`4slBoXqmV(>87Fd!fx@Dk#}iXb2muAgyID9Fz*U4vm22nZa8g^-ZEgpd%S zyrZ3|g|!I?h8} z`UBB)&}x1|{16cluBnDvJOM{ePIjsBhe+n2%002#;Mw4Kc+ccI81<7K$1??yZ!kL8 zHO`|3F^0VkRuJeI4elc)FU2ABN1*J&bFb#g$ITfWWCSs}eRIQVf&ZPiS{>i_yzulv zU8bEK4i16>?Es_JHx&9v3erSQA(r+PBoFO4W`B22{1T+^SWp}ZgqXhDg1LgKn~K?* zu0A5-Iw%bSBz@Qvb_PT~;nH;9X<8qb3|fo_G?l^M9j8w>)0rJ(I}Az7%xof5Jsqyb zW7y4M`W>CcgqA!bi#^n&Sv=%)0%Om(=HM-7?{Om`iws|<7YV_#g^^P-fu&+4V00-D zMLMKO;0FpmXbpB>>XUY9`YTypMZ^s-}$z6O6lRvs`mp8pFHjlaTW%1q}!!1=u`oF>1!8KxIsxC1?;rZwh3Tr z-`iK4vriJKF^aiBXs;sicH>DE73)E<@ z2>4~hyXH$aC6RR!c<(o z>wY(6;vA?~LU%SUm7WzP1p~9_0KAw0<|X*MKXjjcq5l#g_~pv8)ytM%ZTj~vNWmYF z?p>mlSa;#6<4~I{*x&s5iMBzf(sHVtQ@&p3y^o}+-q(SaPA_?vijliRIPqt|U~i%_J_~-?&i=u_8_QyE zqaP#}oc_4E&d5RW>n%IaJ$j{4^SvoM`0n9p@J=#CQq~cj%BVAXBW*5D;kBb28KXnU zudYv3pQ0N56yOSB)ny5a$`dwc@Ou#p8vi7?WLg$ehfZ>s4s~EFZh28<#81Z?cLeTcgwG<}Ra0-uy|~m0Lb$YIq6O3nb)FE#RO>u) z!~wOZ2^g-k<8D9(Dair-PVijJ;t9UuLkD8E%w=fMAx*Iq3kv!Uln>PlL7xN{?ZVyP z0m%&bsviKth$ZZg`2)(dC$c2Sde8$w9V8{tP#$JJFh-wd5&GUAe3OwA!BPO66Olf^ zDi?1R3{hY2HZWBm1TMhfi-0&3d>)BrIGY#LDY(;6Ngv{YR@!Lb!1zRUm4+$X|MWO?+^x4yB_QOQZC-Ud{N&r|UH03VVt25tVKEz2j)Cv{HBPk~7Di!zO} ziAI>x9&MkhLSeC7zRF%FPt71LUy`Z7UD1#dE2$`HEQus3Dk&_fF)}hTG}1Ow3GFFT z>Kg|QzEWGo;_t`!GST|NXN3}_{@J)Wr$^?<*#Hd z3BMJ?QPeDI6hjna6icRQOdw29O$heVharadhAEP&XdcQbf2EZ@mR75vmn#3tRBbL` z{w1kauNEUerm9oqDSsDv49k}AvsBX`TkW^FP24g>JwCT6NB+wc*R9EI`)$;%u1kJP zx@Wj&sAuW3!5#Y@C_GzC1hxaV6B{+_xVbYEV<;6#aD2adFXwpE*dwc~Tjm7kdQxm03_M!tvgP0Bt6U9qaaYVkbxZ_VFg%S{bM_sVBn%RF@qmJe}i1Q$%% zEFH$LS62@%@_15Nlvz*QUe1~>kS=%5LC#Ljjfc9EXA4G$HMh*S?1x!%Co?4{UPm`~ z9EUkGA3>$vw+5z694r~>;E>#q-H?VsYmhdOy`iR|HK8G)V(6qynwCc3-Q)E+)QqWQse#_IC(R9qYmLpgN)@RgrwM;+9!p{u=$v29Zi&s(% za7?w#wX9w&1FwP$p-;%`q#rF0j8jb-7tRCPf4&*N2)=l}a3G{0;D*73WyG=qzXSVY zU1F;!G-Y;WR++9UQP(UYXBPj3nkB1LwbaG{j+NHw z7wD1jev>mJ-iMmYp-Zmao8g6VwL`DrhxVM-4Z%)Pzfu0d&c05%?{tLh`c_>#-+R02 zx{kX72upIG1Y){_Hzzk;y4?hwg*b^+h`X8YFNezAa3dH{mt`2S#qm+os{!V+Q9pKFq`1NMmC{ zG#oSPuaR*Wc9_{I+g=C008{(j$fU*9)9mRKc;a)^Q-viXrIu4!IqCG52Q1oWvWhX} zI(d7o2UfAvOf4rye|ngvT+`lHpbiD^KJEq$BiVrs=fyNS;p&pQ>_OErF z?RZ=dyH6!*^oN&c(y^ zQukfv2bFpDZw{~X(^%Z{%QEk0!8pG0LNN@2iE)tcr<3J13iHD8hU zFfIot*-@1&nzR+}3CHzej|o^XSl{%xiGxu)P5o;9qrmeJK3F#fLG&V8OHJ##CUb|2 zgj}+(DT*nk^l$BxmDLrOYqgIicOoq!Qjwm%Fwdne>ZR)H-e%3f>nxf}v{y768ay>y zji>rxEyw!V%DT4O8|v}0a{iT%wx@&mxzh5LdCsb(nv^Eh>ic`{3zx6M$|Eqtp7U}V zdVd0%^Nf32WB#z~Qst<3IH8&(x+^X0SC6@9MK@NgU3*wP&ugJ|poujeS!*?)y}6#> zkKy0jH|5_qZ(EIf*`{`>$~`2zlNMa(i+Dcn}QDx>;t|(vOO)V0EOZ>vg~;s zb_<7wY)TGGBrSjZ^k4(8KdRSpiEzOyp~$f4j+?C%m^x92zI3@5EIm(&xNGuyK}yhQGCS5LR>&Mm*4>9HRf3$`H}$4z)%FXvfD zZY}4I7adKhE*E!iuP?obDF9Lctw-VYuh*LKonb%q*B$dzr-gLekMntoDLMRGdrw_H zG~TyWt=s7Pir41%n=%Xp2JC0Bm*tPNd$Eg=%+%hue!sH!=CkCd@x63#^!2kM5LS=8rx@Uwjs4jM&6={Z()%!wIPco>&0UKlF`!6rf_}n&2+d7bf@6Y!`gI&f5=G)0+?8vzzUq zoecI~$CmyaWbm?h-7ozqnd#i{F2H#%L7s#%|H2A}4I1Mw`kf^A|Ne^s50+JD@Q`0h zGxqEMo9e$ZE(vqtTQ!mX|D=A>cZr1hv!Ci3qZKdBeb$4X?u(*XO0Ku4pIqKV)@nfX zX$b=zNYiu+Pt3r6qi;;o$kBdtqPwL#yK9RJD-i;*Puc&y!l_7L%hlygwQHpXHTvER zo3wBIZ$$Gd=L(rYdA4?BMfVN{cA_$NvCB!)#ti^WlO!j>aPoA#%mVEe_LtwWt!Q^UapEp z!!2fp;Pku6UCBK3olXo05z@$GcDj~-_kx~5#Kuw;UsvrmzdGLu6o*PZgNidZSaF1U zsQR)9af;AQA#xKY(&Ss7x>U;t@|Ac}c{qHlTXc?)rN~`G?1e@$yB{ib@y0e?!wJbQLT2DYome^{!80)@n5J)_5$dsoSU=s(5T3i^ z?A>~PBSC*%9@j6#nHwe7=53`R z%$okJ%m5KsX8ZnTC*72Z`dmYiQj@1e^=9W~LOqRW6iou2#8k)LiyR>npK=$lbK3+X zFkdzc(rHhfIMOn|`xhX8NB2gDrNGWr?_zE9w~KxYKy0xz?uheq=i+n5cJ6gn3fEUI z)UcftoP^1Kn#tQ(SUnEE=wl9Q1#J&`QjA?{mPRBc(e>D*^7bZ1?D49V%cGq8yzM2! zwgv2T+8WoCn?YP-PTOv_4Tqix`r9hJ7_s&45#^1oWecQx`iPYq4*Y>=J{^FG3!Cl0 zZ&Zdg6{u9_Y(%7>`@!`o`)`T-=ZcJvM0rK!k*BQhiOc-#{}jL31Z=JjP(br#LKh= zV=&Z4)vdxPj~<`wwY+g+8P?u?xgaj@g&nF)$yuDd2ybfUk?A@C;tNG0CO{qwQG^`| ztz{)#jntp~;H|^JO6z&m{=KNuh7?5u&YPh!Wq>=nbV+Yoq=OcUb>9U4qN@?>(ZAIm zkL73O+8z*nz7AU2?h6iS#*ZU_e?TFjpPpsx?W&HQ1JO?jcHa|IFuK@un|u(Td~cUm zT2C<0l-Haxus_KV>W6KU{`yTPc%};^b2W=uzV?m-bN){`e`$aEpFPU#^o$^C*sNeO zw-VX!{jgD!3>xyrq>{3;Nqa`F7Unu+RDY1PRHFs#hxmecMLGquMP3q^+ejIn*r(UO zpSekH*KJ7E3Zk@4PQ_k~R%W4x;=dJOO zxEB)vgn(Ou!9pH&2l8 zh`M?cr+5;q8s#a2rK`vvBKM$;6$45B!pf*fEvc-& z@awdX5xW6ikS9asO^ZF{>gE^7jA+BvBXH|9jptpzvb!s=v>{p9_of%W#m$ilYVFp- z`C+TaNu!UfYaoi*!d?Jhb76g>m(+?dcpXGw$aURfv(1i zEb4Jn1uHBN?)EhR2lSW5FY)IGa8ifkFEBo}wqohs;t16(6>{uFX)N5TcT5$7bHkR` z(UZHmG9m-&e|k2eeg7^a4=KDzCpf<(hm0Ofx)X=ji=BMk1xHb-r9ca#lX=H6yL-sU zZhNU{ghbaNsfkQRd;AmT^D+O1IeBm8koSmgVcalk%5BRGtb_9%riG!Dlz6?tLatMgtZ5qCo$@0bK^ zxIhYupHtJ2tCt1cO>WkZp1ON#@&T&Mfe4MPf!BB4hKP10i@m%)RqPjeteq+Y?SNkr6IHi{4>e+Sn&o9+M z1f4T{ONb)iSR}bVUB6|XudM1TDN(Y>zad25jYcwVF`0mg9>mx&8~2C+0KY!o1fED) z)6feJ4Or&dkzb?p1uOV8sfbp}U0!xY1V9>IhtqqNNHVgvY9j2EQcz#@^FQ=W?2n`H zM(vn?$MA0%q`&=gL|k4CJ6aie#ha`;W5cF8<6^%isq&&IasU=37dECV(t23E>6{x# zW6Q?;oT)ZEck%8_@+P5Vm9nwN%`?e;sZY12SE z=P2ij&172AuIc59{58r!!rn42acr{a*=nh+nDgRGQRD%4wqJh=c(}gK*?qvgTe=u- zI5nrK*`iq%@QZ$YS=7LLj4r{txG5}U$mRZUWy$z?P|Mh=o%sXs4(!FW@+9Ob#pC>i z7tUnAW)Wp?-bdDbX)=L8c1ej%2;JI9jqdeu^kHAs)f zB#EsTcpOMXK4H!iZQ-^Dvy~#89iW+?NHTSW+Qd5X*ig>v${>Tqxlz=SwWCQNAA zTbb!GbMGCk(*ShJz=CIA03c3eV^*@5JGpeLbnJn}%g&oTV_Yx)fu2g*I=p+c%^k~d zxWAP+Ro6pfrZ-^RW13^aet&#!A8nm6%~I7%y*^9WL9^SdKAJHd6t1btutn#D8*%aMdT$40Ez z@2$JS%=IH%fFs{(VVv{kV4=n~%A8eSLvu(DZ;-`im@%Q3vc4)JXQC$g?vZ3-l8r;u_Ov2(Y; zMuRaQ-cjQSTj|^2m|V`G0&j-|gDs+yD}QpIp6Emg(OCy$;reA`yRGstrlQ80Q%yLQ zc<%ZyS~qV4i~8$pwHQr*3|Js#kU8#9mdR{V8|WqiOy+!r{v7lU@!W3pePn^jF5_BH zdMx6OW_hDB+n&VXDEj)-w7L{u9ni|=ywmVidZe-s!>&=J1(IYfV#)DJiIqry&%s~k zyMw!OH2)f-b-T4KDE;F!dIW_Q+O5>kTP;KmuqzL-soF9ZvbZUa##gqSO3mGUOo{#` z9|BIfe&L3hga)_?g4@wu9OpUc_k(ax_f| zx9-M-b#16X;o6Xo?qb(8Qq2xl=ZSsu_)k9y~^SRW#TrsoZUtclP0R z?IRaMyUd7pdxztA1G{sxWJP&s8QP-L94jEnp6^*Qd+g9m zn7Y0XBwL26;XSp3KSuZyz4C0TnFn8g3o9Htn~0my(88aq>8gu@IJkh5fvn$RSADy< zr_tRnlVxz%?de6kItSulQ4e(Q;i*-K`ovUhs*r?CboEtr(3CG@44a2X4=zbF)juFx z^)w?*4lGDWxxNJ^Z>j7v{j_pB#y#D*hGh{oueeQ~${VXGvy9YV+#}u-Ti34iSb%{6 z64H0aL8hwDFq_ow{-%bziI7*CLs8aAm7`IM_9blURyDu)R%<}gzpZ0}9L4d?&hdRT ziZulk|P9&2K6J(aJ>(PXMsl{f!=G&c6IxS@)&}W0T}a#mxRpXz`2>&d<

zue1vAOc#_na3SBR;0LJ#-BcbKb(M$m6zElUS_R_pLON8BNgkcAIMrg}%I=FI5oq+9 ziQgDiJ@!vzl66ZW5hgOSKG*!jQ^RPI2q)%ki*9Ganldw4=t4S>#Wc~?lW5A(L85Dm z@n$MJA+LE@<8qgyNf81E&(Z`{^mb0&bz-@Xh}7g$_ns*&_j)xAw&jv28Hq&!id7s6y9Juzvcw3jiZXeDf$U*g z9_6Gg+7FqXd$hD_+R1Q2$~aIvh;^v?f>s_MfiR!(@gahkR8GKS2#M42@;i=ytb%N(Jd1^a~H_6+*?qZ$A&k>;0s+eGQF!jxfS zUaCgY?|z!5BXG!N$5HS|XvOjUA;rt0&II?l{unNDJAdp}~SP`%s)o@>7QD93@Xl z=t(#%iKVSoCdW^dW!4JJ%+&?K`9Y{k?08-vEbms%F7A$Fplpw;trLM8)P?H6^ncHb znV%Bl-e(%~-D<1Sz+T#Ews=9&6xn&(>}^9#Nc8t>M&U#*SRfw}x{LexG5j1^ru`Mh z>NUHHmcQ8-xlrd1b0yKm23)U>2=j89Pgx0)uTOuOw4ok7_OgbbL-;c+$!-w9b!5Pj z|83Iac}B!<;2I!kKpFVZ3y{IE9Nsr-=ohmI{jJjQOGX;-{JR`$BT+ zM76DuN#W+xG;!j#C$A>1g0#Z){WsaHY<{pqs49$!vzT@4VEr;q|u?c*kDcwd2LE4{&H)2$e4;NlziH%JAJk z8{R&SwE?&?n9qf!X3We>R6T7T&O0t?@)i0h!^({26EhgSHre>%)|{FutlUU(vU`b+ zrv`Jdf>dowiZ-cM)iUo@XWx#5GM|qx4n5CY#LpZ^JY65K4-DCSv7bboq>B~{w8j<- zj)mX`rDih9C^N^ed8eumCCUS1dCowUd2_Imcmucm z9RfF3yo>I;Uh0y0IB6kyA(_6SkD0#8bFDC~!P5?6Qvfv;zcl;D$yI+uCya96LE-4P z(|i7XOlzfyTgQTZJAd$qIBTmJhgpIT*m2$X=nT)2e7DGnbHkb(sr3&STK0_QX^`q32QZy#oi9C z(DKK*MPDBcYVITrsWWui=O;%cdX0CB9oTB`_bhj#bMcBqI4DTJvN(xviUrG1rO5gC zG2ZC{=Q7LI!;A8z?FO)ra#XDN8DeghklE@0SYAFqv?X->XH&rtihMYA97Dq~`fB`% zTxvTgm~s)jA^TgSyu_beU!BfdA_OsWAh`7ca1`xCbF#O^BK7Z(qH}om?F5gU3*=(u z(z5kE{=7icHLIoAIWbU#z+mpeqIEq$hYwfd&~i{%a?pvziX5UjIL?L+3^*Oq`Me^R z>Mmv_Dw$1(MS1uNSyRS2m#5YLjtRSZJ{NjCS+d3CzSf;mUz9Z(mv48EKqZh;h`+9Zm-g}M*D}g|P6fBrC@!CZBla~n4IYouesKJMJ{ws)3W%X6F}0<+A|92Bn@V< z&Wde-Zy0OouMa<)03$R2kX_alGPSvqyXL?(b*qB5(<=|AI z!Dvnk1^&}~38wrpe<3h{YsfPj_Fim8cX7)X_A0d2X6GQZPUmr9otGyKj6RWrbU!86 zYzLZuT4d25W>>RT^?Y$0*VLAfp2hAl(W623^Ut=c(>C9;7cZ~eHHro&mUx%`0GGs3 z46Q|wa~WFlzha|&=`KpGAXm6v;9SC)Wcg)(RTkKPn0QnTZ;p@0 zITZ~(=&sru_f6}E;TzyI&D^3o`K11yYZ-af(99KlLOqUp7|mbqpQ+&6Wz70$=AAjg zRR=mxOF~J95vQp7NWlh?DAd#aOEaR=-ifkxA{fmJ{B#gnT z;_lZmFK8puo%t>>h3qL+MeKmgax#2Dfl?|hEqCgg5D1dIBO)frfX@Bt2`|3HdbNb$ zfrpEU40`O81h%a{8n% zz|R-3)GWjh&UU=`M&?gV`|-b<#dW%38jX?3>1|_aS}0=m1f|{a$|+a&sCz&J_gQMq z=mMDN8T7RifQ9UlQykKfQ$X?NM5_DSmG{M&Q~avb@EXsziPa&*NwIvu4T(Z;F;?1b zH26#0lf{)Y=Gx8M&57kas?q02Piv9lR`7ZMaUAgp`@~^Ss9n&jI@(AR?Z^)mpKJyn zmAS`ClpOIB+Du%?0+KJGb{`Cs5NY>fRM1bAXFLCO%mKE?IB5{Kg(Yhz!Rn1U5&uqXgTdVX2}>eg z7DB3$_n&T-xK7m}jBJDhAN6}oK`@u%k zy|zfRdNx=V5^AXYT|fU$uiRyw2RPkJq^Yeg+1^lRFZL|GW=hARp@t`AKhb46rd0%x zd-^$HCZMgxX$a?~ozvau=2gU#ev{n5QbfqsD1N}DKELQ$x6UMj)eoI}YtDOI>(etiic)uJ(4HK-ByvT}M+-R!i6 z|Jh)qw@%rfG17b)$Jlneadyw1SZHnz0~wLmBT-tiy)<$&su13T$emJDSn;MbHa-Ba#>pvx$Ua&hO1V4k2-(#1G;l^Do)EmPD=i3f?P9uLJJ514=&39p6>ZP3 z0?JuX8uRfvVWf|LXD_p4tdj(jqY&`qmWB%nYeIyRf=zY5`$g|`$bB|-HT!puFJ;+s z+F*DS9+34quoBV0+4NB*-e5fXMxvQ}MRsem5^vE(pWqlV2g<52GGYo!7iAcw;7VnD z;7mREtthc6W_xfX;~8eXiLBGe#U@O4`B7wWDEHUa!8r2K$W%LU>|5jDqW)+`#vcm~ z@;I$%RHEb91F5cK0JbYc6@OGs#09D#mg`fkdKOE8>5^8zz5Tq-(vEUpmsF9#jJC9{ zi##D!qrtPWx$XQww#saUSf~0~OHv{L9VY#S-nc5vC&X=Ld~6=4q*iTp2vxp*he_|$ zmjGD}=cjA~6SCmKId31HwN8$~ncM|D)B>rs@HWwo;LiJ*d%-!Zb93$2&^Usi?9ih4 zY@G3l?q?^_{Ni8yratZ1K()sdX=af*j84_V9V$Axu=euj16llN2-sp`pUttRd&{ba z*aMzc1`>mclmXln;U9UJcjy74;SGoJY)&MyUZ{8SoYyu#fH?;&(+=r-iImS}Wmh_E zd~hVKj)UgvZ;#wjYzg%-Fy*?fgSgk5Puxsc4JgHH-ytOmbjA-Q!uvO zN~eoTqaCI1J%e5I2$dQJ zqY0|8o7WgFoJ;owO&-Y#?e{fp;B}ceoq~DS*MP`h5JZ%ep@@Z0w}v*~@}{tDeih1C zIvKnyy=1Iq{pn2SJvd$B{j;PZ_db};YqrnP`7~~)+xhf)l`_+7J>9_lQR{i%)|rb5 zyT)LH*|~M^YA`NLZ#fq9OiSw<^#R;KS}h`y?G#p#R!+n8sn5vju2lVE&+}@Lf5w4Q46HaY= zj5!U5x^dQZ4{XxJ-i1=1e(H{hjlgd&vQ(hle5jdtTNkrPuT#-kC)r7=e3;6jbY*LUY$g+`DeC1k@wycL_1vn8$8`9osDozR!cG z#$weV8XV`iX}mc>Y3<*3H`Q{ZgKzqP$;Xv^)rs(UnJMqJ-)diMI1LMj?-aJ>fz;Iq zd}B(d&8$Wlv1=vF?v(1Ba=E6a#rJGLOO0C?v1scU9PW$U=IVOkYZcx?U%Ry4BX&-h zfv_wgW94h9|4r4o7Tb+y0+!q0h!q->6(&?u8>Y3pbN?h~;2ei=MQg=TptcdAihCun z+rC>bLYa^l<8GZQ50z1T|9Lfcs)a7|yU~kHO-+`f3Y`WNT>H$=o!Pc$kmnCt>c*{} z)?ee1zqzGCV_;Dw_+v6_U>YBIdD!*Dv3Ui2g<*;&9FF*#wQrc{0wDSNUj=;RElF++ zX}4;NXRhSIuvl~~MXqlzc5=|{ShMyk3e2MAxpLmtu3saK8-@m3c%J{jEo=@^L+^5t%Zqz!M>`2v? z2;ld0AxN4d8;@^`#qeFhs3PFMragMZVxaYrnrUrnJw#WY^bV`K6JiBE1+W8=pF+K4 z`eU2lh)kuutx-2#=Ub`TgG(L6k_i+9FZ|jz0)LK^KyxG02A z!ntcuj{Euqt4_e{K=5cWa^I72hJyWJQMA9XuI6*hcd&fLGy0NImeX&-@zz6J1r-U4 zmz_i3L|q(1YQ2URBBK$Fgy9WJlE&PFsHTcKChvoH{C;OAuzpu%GBVHEW=0h%9Z7wn zLsIfMCEP5WLVkGJvis5A?!+O{n@qE3UZFI`|B1H8k zv=oP>7hgA`)wYQ+La`8mh8&^)6h_74Ky+b(qyDXgV{!1c_erSQb?r^Rh3T|l>{p&O zi{yZd*Ux;TRI#Rhq_(yj-@4S6wNRb98unpVBN@3WX;q=GOm_)53iwLTM-DQt2#uVu zuzCz)a?OeJb+B%6^hIk;P1jd?VgBz*W_eXAgo^LGG1p75TRjPilETbY^gdMO7#1CXn&1bJw|V;ctv*C9 z>xy6zcv~Vvx^cx&#DobE7(m$Y>9HM!)yN>#pM^vf>#uAJq|~y{&IYIaHoDW5WA-x9 zHcfsHcih|&1PFcP`6ABj&%)!e3KGpO;PvpoL)2NKy*aNAmE^^IQ;_i$h6~Y4eNT*Y z@g9}PMb2+tC7Wv2P6~Sb5XbS1S(cX`s+@@c-ePRGIlg*mc&yv*)b0w$|DhdIdL` zc4Nw1p`R+)IvDafHCjiX`rH*->Q|S)@A)+JO$)yy)b-880i3vxVlh$-Q*EX(qG|ho zI$($m@{C>eh$z*#IeeJon)*|CTq|81w~+e$Gcn=`sP$XkLIr_xon@Hn#smZk2gNHP zNhD9o)i;wL?lBW4eU4;W65#iO7($ecIJ*MQVFiJ&hDGyXZ(2K~I*+ z=`^F$YU*Qbd zSK<6cFEY0YihDv1LpxvyvWK)K+Kx18?R=fim2Jcv^_7y#F+WT@uV_$)5FCPIi zT|~8Z8zXF3uLiA=C-lsSN8Q#S-l%IU@sjghev4DE=cU+X6{s7`TmOuDr?Ra}%O`Pn ze8zb?;b7GLnNw2j#O(oL1Lqn!mpZcDMYW;45#`$T8(v8UF*Tw9=XXu;gn2|9`9`E* zWcXo5OlBp1BiG7R62{>3J#MLvbcaTHodtDYhneVPDS;P_lgnsf)AA5ntbpzmQqt%7 z`^`$WaCrbyEdCo9!D|OtR1U4!S{KA3j|g>+3ckzfHfzS^b{{c7894{3Y`*>6$URfm zGH)<+kf)BwkjzSWZ-I4}piuGpGm@fX5@fbu+PJ4NOQ(?5`$S zkMU*_LVy`OhSP7Y-bNb0@B35()i%!9%D8Qg(TV$>Vp&Gb6Bf5J9(?aVvE5L1DP-D% zB`JJHuC#lBvrQx>Bj=_12YDY?*f}HNiMT6QiN`!4Ip5~906E&_ZrvGn{qzikh+b!y z{m4UmS{NF`U-NQXZo7?YjP4NWU<>y-@5dk{)FmCGD2udrpK}AX2?kNKS}kfcM$t3= zgSXW_*(nc!aspW2yLZ0Gq6BE)S&_0%qkdc|ego7!l&2dVc@9{O#^>#gN00%JU6<1a z|6PmCd}+AGCWJknlC8`%WW75{N9fPfF%(kNyS;ke&tr0a3zgMOom}@JlFSXP#tjFI zNV}%V;~?RdGKe6F?qlot!;pql&S!t^n`3A0?e<|QMVi>PshgHzy*#&wcCwOWdDyi4 zf_Rz7(hE;d2Yw>s1R4zw9I3&s$tdycKxIEix%EzBm$7AkwYER5%krh+OoVux7zESK{Ep%$**OL^ zt3qGsTA$QDrQ^_~Wk(6E^cc?(PMb?dR_FE62c&FuknPq?WEe*&DQM&fft*X7kto== zY|K>a+^Ov&#Mmz{9&l57OiTJliy4oI@ZCATHbP@(W-nAz{U2IPW{BkgNN;VaYZe)c z7gyS$bj&>FN_1m=?pS(PX`3&Z-N!P0kN``;bubOYQ#z9xV`-+5z~7+CJa?~&*47hU zGwhdxFTH7tJ9NIin_m^)--wdE1fcVD7wnMk9LiPQWplbo%P|;Ot}5(Ne5$qSQ19FF z=1@8EdAwjtTqO@$3jk)q5WIj|^805bK>L0pwH?`>o$5qa$|SqCMs zmCFH%GNN;Wx3BRn_DQdB*?$m}Jb~3D-Lq)K0}IJPpk^`-Bp6X zZb5ct3zK*2yXS3}BR9~}%OE5?s*d1qcMejF5jAC#YA1hDWwuT4F}|zRJyx_<)DzU@ z4!pIZ+oP{qQ9dlhR?7@@kB(h`h{bDn;|a7C)56v9vOaS-X95vJ2;p`SO2`fTz=k;FfF}76=CK{b$pUJ`~6M@wFS9kxFsh3KWLBLz}w|Pc^6* zHRA9tzicc+Qesw^JU4xy(UWBh`(DCdacfsQJ9&;AQ96pg6#Ej(X*jHY7{`8RG! z40M+hxE;Q(vb(#`xG~(`U5CRb!y7-5_$ZvbwVewRCSw$k#X4d$|3LHRmrIRv$w^ZoHg86mDv6Zck<9B;ZkD0AJ_SQDIPs4)!=r z>es(3&ip1fETLLnJvO$Ej2>Vi?($rLJMd)G)~JUH&#!9IKJDb{vAC!Bl4-6(dTQ(O z&d*{%WeO`4mbY6wLV+rwemnv$;Et^14 zVpi+906cTPLn$7>Q+f+6eP>*_91?DwG@+P#2U>a4lML;;_e7o<-x7O$9Wp>Y>WNHP zmnqoLGc+qCqT=TLi^UwSyu~?;Lu3#O2U^tu+bl8C>^wj1A76$%YkQ7LHMYVZP*dXf zl=A>(cpEaP8+|#@-E(eFo4Kp=ZpYa&WKFP4jvs`x!d6O*qL+Nh0*7$XcOo%MEL-J7WNR3pMtMFBmDIO$aNeld3`P5l) zsi*S7M)Bdd-NvqW($?bq548#|p^Ke!DceYG%&mb!%e{<6jMk#DTaJ1k_GyQsbBoq0 zg;yxloz=5QT3|HHG6b38K~^vHF@>^)kH$?XS!zTe335r@(l*1ctT|GK)eMjXIRpFV0VH z+{4Q**OD!aaXOV|1t60zaynNuOPfY`1T%dP_V2$Js~z%-FzvXju~XO57B5Kk&39ns z_YV$+JobDz7-$%RR+>hi2JgM+W*==~2ofC!9rg`$&1@---LZKqu8&s5j*nj7Rc~DQ z9J4F3ep>1kAKGi*@?|%2;7`z7OJ<*$ue7uLUDE+$#i}l(>tK1S_Ni0ll~ZOy>j5s5 z^HgNc7R|ot)E6aRs@f*dJ7ww_1f<@d{@PT&87ziJ@8U@MGJLewUz+V+;rqfa7lv;G zSx{M?(3K}X0Y$Iq9w(dsbRP{|6Mq7SdB4G~#l`0ZIn} zBTBCsHxV`NnZKr@4R8Iq9ig3KeoJAdSLPBDygUFeE(Ys8F}(MK4b>R)H+<>Vn>EHo zJ2PH#C~=9D>C^q0qBd%ok0d3#`cGA@XDh`F2<9HDsJiDc_+{8Km~KNOQi@X5!}-)3 zV1jstw7kE=4mLZJD(e{k+zunSLh=u`bTin;E0DL{K`piFtigOQ$dFG`6G|mS^J@7r z1h!<+rev*cz4kpHV)-$(V!Ww zbM*l`UKbu>+mZJ%1??asv7G4qFycpG+AF8xR!p&aXpcyp2gG5_TlW(EmTf`E2z)um z48FJSWfO+^f4aEps3@3sjerU&pe&)Z@X?5rbk~BExFES7AqXNM0@5J@0!v9Q-Hm{B zgXGfPDels-MXt+o(Ri^Mr^s2xAM&&Trbz(4X!3 zS`hoo>9}2P`4cnIl+?sf8C9w*Vr+Tl1q}gCN0k>;R;sa-QBE!4PJ36DD-N@q;opr{ z)fyk0`78Idrbwgc5^H@9CA_8H+-n!oEq~-k9XZm=OMe7hA2n3Xkjmv@3WY>_n$q$7 zTJETy7CwGF`$V&fT+jN{2?`r_hF3jhpVpHL!tX(aEU-$9W60=Gt?Os6^vbs#kC_AX z8=va7#o_nWe}`&xDp#fz%s!1yJiesvyAphwpZ(pr(9tewz24?A3+pP_1hr@)JJi6i z0aHiwUm#p7VuRVoY%uwLk(mN+EIhL`8 z0C+2}pY~90Of5ch?_6*iT|8!2;vvIz_C&KKXy9!DU=DjSt$WP~ko!L@0o!3*2osd` z-^IL_8#HdPtX7%yZozu#t6)6iR2`wrJ3&yC#g5lAY}7c^7dNCelh3mU4;?(^+#jrx z)sn`f?s?@#!A6{tB%g`B(|Lpw>7hssBOoMG%RB)-lmXLLG z5Bwh~MgFT=;c?iq1|`-B@jc$JGiK7Slk0$8ZKuG(qKI3%#=KNM^z&9F_ZA#7Z8}*9 zM9);{5h$0oVfDfFv&j4|+zP}w?25@&AvZcaC69mOT=h`N4@C%*JV)EvdpiKlR|@Yg zVNa8kOd1wuh^ekTT5Y}e`5c-k;+~ZW0^vDreWX@JK)mK&tsV%bj|9`J&KuBuj*GY(M-zzPEQE93o9l$ zfP8xQ@Wn!H$Uivzbq=Q-USUvvAxf^)pXA zV4hWVO-Ka%Ek3gI{`M@g>6=^p;YC>ldX+|=Yja}eymB6T{=|OV=OYKM-xIv!L`FfO zGr3b?3P;6U>?r6yiwO_VzUtKXyI22o`fvt`p9qnSWDxnY2XXtC>Vwi8=%tJVr8m(c znV4`3BEF&3@HCQodo?#)gOj~3Yh!7|r@UAhu4Sm|>i^NkG;Yc-MAv4`E_0yZ*#y4W zh2QDC9Gk3uux1)fDJK{+qI51hV9=gEp_RhmN!rklqX1nxbgKpQ+CY35ua$!%eG!fo zgw`j2v2EOmyf&|8fshU|RSBgj0mAZmY1P@kxiGUi7vm4*;x~xf6pQJ1^-wy!ZK z2mWMoiw>CBkDM=@zgf`MY@pC#nKWF^*1zj;CKGx>_Tsn|1}g_;6VxHW9- zoL=tR!B-#m)tu=81hN`YV^_CX10yWFXCFIadd9pEm3=Rx6NBObleP=@21{0Q|I$cI)!ixc46$sZJA>ODf)~nw27Ems&MmN6Y*Q z8}&q+;?dQ*the)pcg8|)zq0PP+`=gZxOhoVeh5Fhc=p#D?HnX1pKbdYqTrr>5)rYy zQguCsur)%XbjLF60YpK%v^G&-kGbf(hqH5reIaIzMxI>{bT~}I+?5b& zPd!LPS0`CrvS9K}Gj@Q6y%hBKly>3pNwwOBc=wRlOwI&nrP5%c^erGl2D9-p>cCT& zI_mHrP+gy?5h(h}(`0LhU9SC#!B4xcC%!o}vG?8_Q3$+Y)LcAvJCF>dzW5$x8nb)S z_oS&Q!;K|r6?6nL0J)>Ya+f5kk569lu=sd?-ejrh(64u8P#5qQfp|PcgWN}s{aT}Gn5V2bs3iC;0Oc1~F_M3|nLYsT|Vy9;354vko&+_VOyv>DN z8aQnIGlv+KHS65!^Ln4=y~j52)rd9Wv=Zk3n?hvEfz~Y(R?_}4w%|Zm?*4h zF#Lv{KIRpdYa|0O_cO78wST-xlKa%Lv`fveTSA$hOdtl(+Z9dJainwVB6V*x#T(lj#rM%RCN#Lna!IaC25=Fr?Nx2NOOmoKBoM&<{2h{A zkk!)?3!cJCzu%R{2#+v%CXE)khp&hV^zRo_cI_Dd{`oq81lBu^XmW(hn;_p{Cx?03gLyx@Nabq;t|ck2G}m_6?ojQI{=bnq6N`ZCzueLK z#lEF{H&BX2ZakvZ_Wt_bTkU^O2VBjM2Qn6Ht&hj7+8t~BQxok>`VC-rFIA0Ho~>qn zzy`l{UvZ9mEIj_R=JFM(gLUl96>* z*oloSg;3P7q5_K|9g>nSs0bJP{o>-HY24~Ek^iu})*4DL53rga`lCS9h!aeLrTc7$%Yw8IP!)HRfVDDf>%vT14 zah>c^_!+;2ib-g#ZJc&`M6u8)QV!+gcZH`s=L#?80PbikKlU@fINn2Fs&^yU>#Z8^snAvGAm%sU zoKcO4@8!QQ7AP`1Jq2~&L&E^Ea>+~5CX<0a_htSyi)}oU=v0PcD~_51Ai%1l;2N5} z%;}*wl2tjPf~`BPl5iPs$&C;Iio&{xa|6Gaz24LCag%qj#@Y($Qf(tXoaV5W=YMr( zxVvN!^*tYxMI&(JW75HR%f z!dG58_Z4Q!{Iv1m?N_Rv#6cAZS$>kf{LmjQp6l!xz8*3euAepZF7e!~)@RAC4*P2! zXHp7Q@%xYEs=`l4k8?NP=dJT-MbZ_g>o(2j=!>y5M$SmYh{Ycq{IlrI&-_w-O0~a2 zI2hGF9aD%FgN*KPiUs1RPc5X`7PG3jFbOi)YV!mY=5og?@zB`PQ(H)krn%0Dk_JRQOqzBh$Z^~#JvUs5?gsH6m;e=MJg zj)gWg^b(F}&Dgt!iGqlNs_qe+fDTUiQbRYOjT~%x%1TzZ-%04Qnf*=nKZ~VOG)V}u zHe!DuOLA?g{{|^>v$zF?1E(U}HGYYx@j5Agw7{sF!wQ z4wr6mIbisG7j|62703?(w~YEouD)^d+)W*nB{BWqiTY339I042YUD;=`l;ntsN2#Gj7lw8Z; z4`C`)M8l-xP*|opM*2?oDVdEM$IepnzpK{-2_hhxWgwSSv3FFMhGgC>NojG5AdHoJnLW_}GCo{oOi~*>$ z2qNgZ;jm7)RjNWDtf%XYhnNY#LWvTNjqMfl@&U5FgMg6&w}k({KmNCGb{vqo6h5RH zc13&2;tlbF!-)~xu$VC0f3F+8y`E3zqU|O5qu3i2CwW`%JsDo%DtxEV{{{5xpAT9z M)pS)Wl`TL24{iKN9{>OV literal 0 HcmV?d00001 diff --git a/dlp/resources/test.txt b/dlp/resources/test.txt new file mode 100644 index 000000000000..c2ee3815bc9b --- /dev/null +++ b/dlp/resources/test.txt @@ -0,0 +1 @@ +My phone number is (223) 456-7890 and my email address is gary@somedomain.com. \ No newline at end of file From 6bedaa80ff926efc104adb5679956934410343ad Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 3 Jan 2018 16:20:23 -0800 Subject: [PATCH 03/11] add argparse --- dlp/inspect_file.py | 35 ++++++++++++++++++++++++++++++++++- dlp/inspect_gcs_file.py | 39 +++++++++++++++++++++++++++++++++++++-- dlp/inspect_gcs_test.py | 1 + dlp/inspect_string.py | 32 ++++++++++++++++++++++++++++++-- 4 files changed, 102 insertions(+), 5 deletions(-) diff --git a/dlp/inspect_file.py b/dlp/inspect_file.py index c132c8861057..56b7baa49ec3 100644 --- a/dlp/inspect_file.py +++ b/dlp/inspect_file.py @@ -12,8 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Sample app that uses the Data Loss Prevent API to inspect a file.""" + from __future__ import print_function +import argparse + # [START inspect_file] def inspect_file(filename, info_types=None, min_likelihood=None, @@ -89,4 +93,33 @@ def inspect_file(filename, info_types=None, min_likelihood=None, if __name__ == '__main__': - inspect_file("/usr/local/google/home/gorcester/Downloads/wQOVLom8Gsa.png", ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) + parser = argparse.ArgumentParser( + description=__doc__) + parser.add_argument('filename', help='The path to the file to inspect.') + parser.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of info ' + 'categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser.add_argument('--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + parser.add_argument('--include_quote', type=bool, + help='A boolean for whether to display a quote of the detected ' + 'information in the results.') + parser.add_argument('--mime_type', + help='The MIME type of the file. If not specified, the type is ' + 'inferred via the Python standard library\'s mimetypes module.') + + args = parser.parse_args() + + inspect_file( + args.filename, info_types=args.info_types, + min_likelihood=args.min_likelihood, include_quote=args.include_quote, + mime_type=args.mime_type) diff --git a/dlp/inspect_gcs_file.py b/dlp/inspect_gcs_file.py index 94a854bdc871..ae9626fa5940 100644 --- a/dlp/inspect_gcs_file.py +++ b/dlp/inspect_gcs_file.py @@ -12,8 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Sample app that uses the Data Loss Prevent API to inspect a file on Google +Cloud Storage.""" + + from __future__ import print_function +import argparse + # [START inspect_gcs_file] def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, @@ -88,6 +94,35 @@ def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, print('No findings.') # [END inspect_gcs_file] + if __name__ == '__main__': -# inspect_gcs_file('andrewsg-test', 'wQOVLom8Gsa.png', ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) - inspect_gcs_file('nodejs-docs-samples-dlp', 'test.txt', ["EMAIL_ADDRESS", "PHONE_NUMBER"]) \ No newline at end of file + parser = argparse.ArgumentParser( + description=__doc__) + parser.add_argument('bucket', + help='The name of the GCS bucket containing the file.') + parser.add_argument('filename', + help='The name of the file in the bucket, including the path, e.g. ' + '"images/myfile.png".') + parser.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of info ' + 'categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser.add_argument('--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + parser.add_argument('--include_quote', type=bool, + help='A boolean for whether to display a quote of the detected ' + 'information in the results.') + + args = parser.parse_args() + + inspect_gcs_file( + args.bucket, args.filename, info_types=args.info_types, + min_likelihood=args.min_likelihood, include_quote=args.include_quote) diff --git a/dlp/inspect_gcs_test.py b/dlp/inspect_gcs_test.py index b5a10bb7a1eb..047db48b485f 100644 --- a/dlp/inspect_gcs_test.py +++ b/dlp/inspect_gcs_test.py @@ -49,6 +49,7 @@ def bucket(request): # Yield the object to the test code; lines after this execute as a teardown. yield bucket + # Delete the files. for blob in blobs: blob.delete() diff --git a/dlp/inspect_string.py b/dlp/inspect_string.py index 33570c215b20..908ff4b7b803 100644 --- a/dlp/inspect_string.py +++ b/dlp/inspect_string.py @@ -12,8 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +"""Sample app that uses the Data Loss Prevent API to inspect a string.""" + from __future__ import print_function +import argparse + # [START inspect_string] def inspect_string(item, info_types=None, min_likelihood=None, @@ -79,5 +83,29 @@ def inspect_string(item, info_types=None, min_likelihood=None, if __name__ == '__main__': - inspect_string("I'm Gary and my email is gary@example.com", ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) - # DO NOT SUBMIT \ No newline at end of file + parser = argparse.ArgumentParser( + description=__doc__) + parser.add_argument('item', help='The string to inspect.') + parser.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of info ' + 'categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser.add_argument('--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + parser.add_argument('--include_quote', type=bool, + help='A boolean for whether to display a quote of the detected ' + 'information in the results.') + + args = parser.parse_args() + + inspect_string( + args.item, info_types=args.info_types, + min_likelihood=args.min_likelihood, include_quote=args.include_quote) From 38bc89ee59242333a9e9f5d4ae7735bedf2d73b9 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 3 Jan 2018 17:05:24 -0800 Subject: [PATCH 04/11] consolidate to one file. --- dlp/inspect_content.py | 318 ++++++++++++++++++ ...ct_gcs_test.py => inspect_content_test.py} | 90 ++++- dlp/inspect_file.py | 125 ------- dlp/inspect_file_test.py | 62 ---- dlp/inspect_gcs_file.py | 128 ------- dlp/inspect_string.py | 111 ------ dlp/inspect_string_test.py | 46 --- 7 files changed, 400 insertions(+), 480 deletions(-) create mode 100644 dlp/inspect_content.py rename dlp/{inspect_gcs_test.py => inspect_content_test.py} (51%) delete mode 100644 dlp/inspect_file.py delete mode 100644 dlp/inspect_file_test.py delete mode 100644 dlp/inspect_gcs_file.py delete mode 100644 dlp/inspect_string.py delete mode 100644 dlp/inspect_string_test.py diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py new file mode 100644 index 000000000000..d4f6ac865fda --- /dev/null +++ b/dlp/inspect_content.py @@ -0,0 +1,318 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app that uses the Data Loss Prevent API to inspect a string, a +local file or a file on Google Cloud Storage.""" + +from __future__ import print_function + +import argparse + + +# [START inspect_string] +def inspect_string(item, info_types=None, min_likelihood=None, + max_findings=None, include_quote=True): + """Uses the Data Loss Prevention API to analyze a string for protected data. + Args: + item: The string to inspect. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + include_quote: Boolean for whether to display a quote of the detected + information in the results. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_type by converting the list of strings into a list of + # dictionaries (protos are also accepted). + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'max_findings': max_findings, + 'include_quote': include_quote, + } + + # Construct the items list (in this case, only one item, in string form). + items = [{'type': 'text/plain', 'value': item}] + + # Call the API. + response = dlp.inspect_content(inspect_config, items) + + # Print out the results. + if response.results[0].findings: + for finding in response.results[0].findings: + try: + print('Quote: {}'.format(finding.quote)) + except AttributeError: + pass + print('Info type: {}'.format(finding.info_type.name)) + print('Likelihood: {}'.format(finding.likelihood)) + else: + print('No findings.') +# [END inspect_string] + + +# [START inspect_file] +def inspect_file(filename, info_types=None, min_likelihood=None, + max_findings=None, include_quote=True, mime_type=None): + """Uses the Data Loss Prevention API to analyze a file for protected data. + Args: + filename: The path to the file to inspect. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + include_quote: Boolean for whether to display a quote of the detected + information in the results. + mime_type: The MIME type of the file. If not specified, the type is + inferred via the Python standard library's mimetypes module. + Returns: + None; the response from the API is printed to the terminal. + """ + + import mimetypes + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_type by converting the list of strings into a list of + # dictionaries (protos are also accepted). + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'max_findings': max_findings, + 'include_quote': include_quote, + } + + # If mime_type is not specified, guess it from the filename. + if mime_type is None: + mime_guess = mimetypes.MimeTypes().guess_type(filename) + mime_type = mime_guess[0] or 'application/octet-stream' + + # Construct the items list by reading the file as a binary string. + with open(filename, mode='rb') as f: + items = [{'type': mime_type, 'data': f.read()}] + + # Call the API. + response = dlp.inspect_content(inspect_config, items) + + # Print out the results. + if response.results[0].findings: + for finding in response.results[0].findings: + try: + print('Quote: {}'.format(finding.quote)) + except AttributeError: + pass + print('Info type: {}'.format(finding.info_type.name)) + print('Likelihood: {}'.format(finding.likelihood)) + else: + print('No findings.') +# [END inspect_file] + + +# [START inspect_gcs_file] +def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, + max_findings=None): + """Uses the Data Loss Prevention API to analyze a string for protected data. + Args: + bucket: The name of the GCS bucket containing the file, as a string. + filename: The name of the file in the bucket, including the path, as a + string; e.g. 'images/myfile.png'. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + max_findings: The maximum number of findings to report; 0 = no maximum. + Returns: + None; the response from the API is printed to the terminal. + """ + + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_type by converting the list of strings into a list of + # dictionaries (protos are also accepted). + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'max_findings': max_findings, + } + + # Construct a cloud_storage_options dictionary with the file's URL. + url = 'gs://{}/{}'.format(bucket, filename) + storage_config = {'cloud_storage_options': + {'file_set': + {'url': url} + } + } + + operation = dlp.create_inspect_operation(inspect_config, storage_config, + None) + + # Get the operation result name, which can be used to look up the full + # results. This call blocks until the operation is complete; to avoid + # blocking, use operation.add_done_callback(fn) instead. + operation_result = operation.result() + + response = dlp.list_inspect_findings(operation_result.name) + + # TODO DO NOT SUBMIT: haven't successfully gotten results object so not sure this is correct + if response.result.findings: + for finding in response.result.findings: + try: + print('Quote: {}'.format(finding.quote)) + except AttributeError: + pass + print('Info type: {}'.format(finding.info_type.name)) + print('Likelihood: {}'.format(finding.likelihood)) + else: + print('No findings.') +# [END inspect_gcs_file] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers(dest='content', + help='Select how to submit content to the API.') + + parser_string = subparsers.add_parser('string', help='Inspect a string.') + parser_string.add_argument('item', help='The string to inspect.') + parser_string.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of ' + 'info categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser_string.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser_string.add_argument('--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + parser_string.add_argument('--include_quote', type=bool, + help='A boolean for whether to display a quote of the detected ' + 'information in the results.') + + parser_file = subparsers.add_parser('file', help='Inspect a local file.') + parser_file.add_argument('filename', + help='The path to the file to inspect.') + parser_file.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of ' + 'info categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser_file.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser_file.add_argument('--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + parser_file.add_argument('--include_quote', type=bool, + help='A boolean for whether to display a quote of the detected ' + 'information in the results.') + parser_file.add_argument('--mime_type', + help='The MIME type of the file. If not specified, the type is ' + 'inferred via the Python standard library\'s mimetypes module.') + + parser_gcs = subparsers.add_parser('gcs', + help='Inspect files on Google Cloud Storage.') + parser_gcs.add_argument('bucket', + help='The name of the GCS bucket containing the file.') + parser_gcs.add_argument('filename', + help='The name of the file in the bucket, including the path, e.g. ' + '"images/myfile.png". Wildcards are permitted.') + parser_gcs.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of ' + 'info categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser_gcs.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser_gcs.add_argument('--max_findings', type=int, + help='The maximum number of findings to report; 0 = no maximum.') + + args = parser.parse_args() + + if args.content == 'string': + inspect_string( + args.item, info_types=args.info_types, + min_likelihood=args.min_likelihood, include_quote=args.include_quote) + elif args.content == 'file': + inspect_file( + args.filename, info_types=args.info_types, + min_likelihood=args.min_likelihood, include_quote=args.include_quote, + mime_type=args.mime_type) + elif args.content == 'gcs': + inspect_gcs_file( + args.bucket, args.filename, info_types=args.info_types, + min_likelihood=args.min_likelihood) diff --git a/dlp/inspect_gcs_test.py b/dlp/inspect_content_test.py similarity index 51% rename from dlp/inspect_gcs_test.py rename to dlp/inspect_content_test.py index 047db48b485f..a4bd5781f428 100644 --- a/dlp/inspect_gcs_test.py +++ b/dlp/inspect_content_test.py @@ -1,13 +1,13 @@ # Copyright 2017 Google Inc. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, +# distributed under the License is distributed on an 'AS IS' BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. @@ -19,7 +19,8 @@ import google.cloud.exceptions import google.cloud.storage -import inspect_gcs_file +import inspect_content + GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT') TEST_BUCKET_NAME = GCLOUD_PROJECT + '-dlp-python-client-test' @@ -59,15 +60,87 @@ def bucket(request): print('teardown complete') +def test_inspect_string(capsys): + test_string = 'I am Gary and my email is gary@example.com' + + inspect_content.inspect_string( + test_string, include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: EMAIL_ADDRESS' in out + + +def test_inspect_string_with_info_types(capsys): + test_string = 'I am Gary and my email is gary@example.com' + + inspect_content.inspect_string( + test_string, info_types=['US_MALE_NAME'], include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: US_MALE_NAME' in out + assert 'Info type: EMAIL_ADDRESS' not in out + + +def test_inspect_string_no_results(capsys): + test_string = 'Nothing to see here' + + inspect_content.inspect_string( + test_string, include_quote=True) + + out, _ = capsys.readouterr() + assert 'No findings' in out + + +def test_inspect_file(capsys): + test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.txt') + + inspect_content.inspect_file( + test_filepath, include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: EMAIL_ADDRESS' in out + + +def test_inspect_file_with_info_types(capsys): + test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.txt') + + inspect_content.inspect_file( + test_filepath, ['PHONE_NUMBER'], include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: PHONE_NUMBER' in out + assert 'Info type: EMAIL_ADDRESS' not in out + + +def test_inspect_file_no_results(capsys): + test_filepath = os.path.join(RESOURCE_DIRECTORY, 'harmless.txt') + + inspect_content.inspect_file( + test_filepath, include_quote=True) + + out, _ = capsys.readouterr() + assert 'No findings' in out + + +def test_inspect_image_file(capsys): + test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png') + + inspect_content.inspect_file( + test_filepath, include_quote=True) + + out, _ = capsys.readouterr() + assert 'Info type: PHONE_NUMBER' in out + + def test_inspect_gcs_file(bucket, capsys): - inspect_gcs_file.inspect_gcs_file(bucket.name, 'test.txt') + inspect_content.inspect_gcs_file(bucket.name, 'test.txt') out, _ = capsys.readouterr() assert 'Info type: EMAIL_ADDRESS' in out def test_inspect_gcs_file_with_info_types(bucket, capsys): - inspect_gcs_file.inspect_gcs_file( + inspect_content.inspect_gcs_file( bucket.name, 'test.txt', info_types=['EMAIL_ADDRESS']) out, _ = capsys.readouterr() @@ -75,20 +148,21 @@ def test_inspect_gcs_file_with_info_types(bucket, capsys): def test_inspect_gcs_file_no_results(bucket, capsys): - inspect_gcs_file.inspect_gcs_file(bucket.name, 'harmless.txt') + inspect_content.inspect_gcs_file(bucket.name, 'harmless.txt') out, _ = capsys.readouterr() assert 'No findings' in out def test_inspect_gcs_image_file(bucket, capsys): - inspect_gcs_file.inspect_gcs_file(bucket.name, 'test.png') + inspect_content.inspect_gcs_file(bucket.name, 'test.png') out, _ = capsys.readouterr() assert 'Info type: EMAIL_ADDRESS' in out + def test_inspect_gcs_multiple_file(bucket, capsys): - inspect_gcs_file.inspect_gcs_file(bucket.name, '*') + inspect_content.inspect_gcs_file(bucket.name, '*') out, _ = capsys.readouterr() assert 'Info type: PHONE_NUMBER' in out diff --git a/dlp/inspect_file.py b/dlp/inspect_file.py deleted file mode 100644 index 56b7baa49ec3..000000000000 --- a/dlp/inspect_file.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sample app that uses the Data Loss Prevent API to inspect a file.""" - -from __future__ import print_function - -import argparse - - -# [START inspect_file] -def inspect_file(filename, info_types=None, min_likelihood=None, - max_findings=None, include_quote=True, mime_type=None): - """Uses the Data Loss Prevention API to analyze a file for protected data. - Args: - filename: The path to the file to inspect. - info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. - min_likelihood: A string representing the minimum likelihood threshold - that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', - 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. - max_findings: The maximum number of findings to report; 0 = no maximum. - include_quote: Boolean for whether to display a quote of the detected - information in the results. - mime_type: The MIME type of the file. If not specified, the type is - inferred via the Python standard library's mimetypes module. - Returns: - None; the response from the API is printed to the terminal. - """ - - import mimetypes - - # Import the client library - import google.cloud.dlp - - # Instantiate a client. - dlp = google.cloud.dlp.DlpServiceClient() - - # Prepare info_type by converting the list of strings into a list of - # dictionaries (protos are also accepted). - if info_types is not None: - info_types = [{'name': info_type} for info_type in info_types] - - # Construct the configuration dictionary. Keys which are None may - # optionally be omitted entirely. - inspect_config = { - 'info_types': info_types, - 'min_likelihood': min_likelihood, - 'max_findings': max_findings, - 'include_quote': include_quote, - } - - # If mime_type is not specified, guess it from the filename. - if mime_type is None: - mime_guess = mimetypes.MimeTypes().guess_type(filename) - mime_type = mime_guess[0] or 'application/octet-stream' - - # Construct the items list by reading the file as a binary string. - with open(filename, mode='rb') as f: - items = [{'type': mime_type, 'data': f.read()}] - - # Call the API. - response = dlp.inspect_content(inspect_config, items) - - # Print out the results. - if response.results[0].findings: - for finding in response.results[0].findings: - try: - print('Quote: {}'.format(finding.quote)) - except AttributeError: - pass - print('Info type: {}'.format(finding.info_type.name)) - print('Likelihood: {}'.format(finding.likelihood)) - else: - print('No findings.') -# [END inspect_file] - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__) - parser.add_argument('filename', help='The path to the file to inspect.') - parser.add_argument('--info_types', action='append', - help='Strings representing info types to look for. A full list of info ' - 'categories and types is available from the API. Examples ' - 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' - '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' - 'the API will use a limited default set. Specify this flag ' - 'multiple times to specify multiple info types.') - parser.add_argument('--min_likelihood', - choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', - 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], - help='A string representing the minimum likelihood threshold that ' - 'constitutes a match.') - parser.add_argument('--max_findings', type=int, - help='The maximum number of findings to report; 0 = no maximum.') - parser.add_argument('--include_quote', type=bool, - help='A boolean for whether to display a quote of the detected ' - 'information in the results.') - parser.add_argument('--mime_type', - help='The MIME type of the file. If not specified, the type is ' - 'inferred via the Python standard library\'s mimetypes module.') - - args = parser.parse_args() - - inspect_file( - args.filename, info_types=args.info_types, - min_likelihood=args.min_likelihood, include_quote=args.include_quote, - mime_type=args.mime_type) diff --git a/dlp/inspect_file_test.py b/dlp/inspect_file_test.py deleted file mode 100644 index f885bc251e0d..000000000000 --- a/dlp/inspect_file_test.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import inspect_file - - -def test_inspect_file(capsys): - test_filepath = os.path.join( - os.path.dirname(__file__), 'resources/test.txt') - - inspect_file.inspect_file( - test_filepath, include_quote=True) - - out, _ = capsys.readouterr() - assert 'Info type: EMAIL_ADDRESS' in out - - -def test_inspect_file_with_info_types(capsys): - test_filepath = os.path.join( - os.path.dirname(__file__), 'resources/test.txt') - - inspect_file.inspect_file( - test_filepath, ['PHONE_NUMBER'], include_quote=True) - - out, _ = capsys.readouterr() - assert 'Info type: PHONE_NUMBER' in out - assert 'Info type: EMAIL_ADDRESS' not in out - - -def test_inspect_file_no_results(capsys): - test_filepath = os.path.join( - os.path.dirname(__file__), 'resources/harmless.txt') - - inspect_file.inspect_file( - test_filepath, include_quote=True) - - out, _ = capsys.readouterr() - assert 'No findings' in out - - -def test_inspect_image_file(capsys): - test_filepath = os.path.join( - os.path.dirname(__file__), 'resources/test.png') - - inspect_file.inspect_file( - test_filepath, include_quote=True) - - out, _ = capsys.readouterr() - assert 'Info type: PHONE_NUMBER' in out diff --git a/dlp/inspect_gcs_file.py b/dlp/inspect_gcs_file.py deleted file mode 100644 index ae9626fa5940..000000000000 --- a/dlp/inspect_gcs_file.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sample app that uses the Data Loss Prevent API to inspect a file on Google -Cloud Storage.""" - - -from __future__ import print_function - -import argparse - - -# [START inspect_gcs_file] -def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, - max_findings=None): - """Uses the Data Loss Prevention API to analyze a string for protected data. - Args: - bucket: The name of the GCS bucket containing the file, as a string. - filename: The name of the file in the bucket, including the path, as a - string; e.g. 'images/myfile.png'. - info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. - min_likelihood: A string representing the minimum likelihood threshold - that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', - 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. - max_findings: The maximum number of findings to report; 0 = no maximum. - Returns: - None; the response from the API is printed to the terminal. - """ - - # Import the client library - import google.cloud.dlp - - # Instantiate a client. - dlp = google.cloud.dlp.DlpServiceClient() - - # Prepare info_type by converting the list of strings into a list of - # dictionaries (protos are also accepted). - if info_types is not None: - info_types = [{'name': info_type} for info_type in info_types] - - # Construct the configuration dictionary. Keys which are None may - # optionally be omitted entirely. - inspect_config = { - 'info_types': info_types, - 'min_likelihood': min_likelihood, - 'max_findings': max_findings, - } - - # Construct a cloud_storage_options dictionary with the file's URL. - url = 'gs://{}/{}'.format(bucket, filename) - storage_config = {'cloud_storage_options': - {'file_set': - {'url': url} - } - } - - operation = dlp.create_inspect_operation(inspect_config, storage_config, - None) - - # Get the operation result name, which can be used to look up the full - # results. This call blocks until the operation is complete; to avoid - # blocking, use operation.add_done_callback(fn) instead. - operation_result = operation.result() - - response = dlp.list_inspect_findings(operation_result.name) - - # TODO DO NOT SUBMIT: haven't successfully gotten results object so not sure this is correct - if response.result.findings: - for finding in response.result.findings: - try: - print('Quote: {}'.format(finding.quote)) - except AttributeError: - pass - print('Info type: {}'.format(finding.info_type.name)) - print('Likelihood: {}'.format(finding.likelihood)) - else: - print('No findings.') -# [END inspect_gcs_file] - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__) - parser.add_argument('bucket', - help='The name of the GCS bucket containing the file.') - parser.add_argument('filename', - help='The name of the file in the bucket, including the path, e.g. ' - '"images/myfile.png".') - parser.add_argument('--info_types', action='append', - help='Strings representing info types to look for. A full list of info ' - 'categories and types is available from the API. Examples ' - 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' - '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' - 'the API will use a limited default set. Specify this flag ' - 'multiple times to specify multiple info types.') - parser.add_argument('--min_likelihood', - choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', - 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], - help='A string representing the minimum likelihood threshold that ' - 'constitutes a match.') - parser.add_argument('--max_findings', type=int, - help='The maximum number of findings to report; 0 = no maximum.') - parser.add_argument('--include_quote', type=bool, - help='A boolean for whether to display a quote of the detected ' - 'information in the results.') - - args = parser.parse_args() - - inspect_gcs_file( - args.bucket, args.filename, info_types=args.info_types, - min_likelihood=args.min_likelihood, include_quote=args.include_quote) diff --git a/dlp/inspect_string.py b/dlp/inspect_string.py deleted file mode 100644 index 908ff4b7b803..000000000000 --- a/dlp/inspect_string.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sample app that uses the Data Loss Prevent API to inspect a string.""" - -from __future__ import print_function - -import argparse - - -# [START inspect_string] -def inspect_string(item, info_types=None, min_likelihood=None, - max_findings=None, include_quote=True): - """Uses the Data Loss Prevention API to analyze a string for protected data. - Args: - item: The string to inspect. - info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. - min_likelihood: A string representing the minimum likelihood threshold - that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', - 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. - max_findings: The maximum number of findings to report; 0 = no maximum. - include_quote: Boolean for whether to display a quote of the detected - information in the results. - Returns: - None; the response from the API is printed to the terminal. - """ - - # Import the client library - import google.cloud.dlp - - # Instantiate a client. - dlp = google.cloud.dlp.DlpServiceClient() - - # Prepare info_type by converting the list of strings into a list of - # dictionaries (protos are also accepted). - if info_types is not None: - info_types = [{'name': info_type} for info_type in info_types] - - # Construct the configuration dictionary. Keys which are None may - # optionally be omitted entirely. - inspect_config = { - 'info_types': info_types, - 'min_likelihood': min_likelihood, - 'max_findings': max_findings, - 'include_quote': include_quote, - } - - # Construct the items list (in this case, only one item, in string form). - items = [{'type': 'text/plain', 'value': item}] - - # Call the API. - response = dlp.inspect_content(inspect_config, items) - - # Print out the results. - if response.results[0].findings: - for finding in response.results[0].findings: - try: - print('Quote: {}'.format(finding.quote)) - except AttributeError: - pass - print('Info type: {}'.format(finding.info_type.name)) - print('Likelihood: {}'.format(finding.likelihood)) - else: - print('No findings.') -# [END inspect_string] - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__) - parser.add_argument('item', help='The string to inspect.') - parser.add_argument('--info_types', action='append', - help='Strings representing info types to look for. A full list of info ' - 'categories and types is available from the API. Examples ' - 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' - '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' - 'the API will use a limited default set. Specify this flag ' - 'multiple times to specify multiple info types.') - parser.add_argument('--min_likelihood', - choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', - 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], - help='A string representing the minimum likelihood threshold that ' - 'constitutes a match.') - parser.add_argument('--max_findings', type=int, - help='The maximum number of findings to report; 0 = no maximum.') - parser.add_argument('--include_quote', type=bool, - help='A boolean for whether to display a quote of the detected ' - 'information in the results.') - - args = parser.parse_args() - - inspect_string( - args.item, info_types=args.info_types, - min_likelihood=args.min_likelihood, include_quote=args.include_quote) diff --git a/dlp/inspect_string_test.py b/dlp/inspect_string_test.py deleted file mode 100644 index 78f53aaeb712..000000000000 --- a/dlp/inspect_string_test.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2017 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import inspect_string - - -def test_inspect_string(capsys): - test_string = 'I am Gary and my email is gary@example.com' - - inspect_string.inspect_string( - test_string, include_quote=True) - - out, _ = capsys.readouterr() - assert 'Info type: EMAIL_ADDRESS' in out - - -def test_inspect_string_with_info_types(capsys): - test_string = 'I am Gary and my email is gary@example.com' - - inspect_string.inspect_string( - test_string, info_types=['US_MALE_NAME'], include_quote=True) - - out, _ = capsys.readouterr() - assert 'Info type: US_MALE_NAME' in out - assert 'Info type: EMAIL_ADDRESS' not in out - - -def test_inspect_string_no_results(capsys): - test_string = 'Nothing to see here' - - inspect_string.inspect_string( - test_string, include_quote=True) - - out, _ = capsys.readouterr() - assert 'No findings' in out From 9268a53a6ab468ed216d1dcabf13960db0f8bed3 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Wed, 3 Jan 2018 17:48:11 -0800 Subject: [PATCH 05/11] add redact string --- dlp/inspect_content.py | 5 --- dlp/redact.py | 84 ++++++++++++++++++++++++++++++++++++++++++ dlp/redact_test.py | 41 +++++++++++++++++++++ 3 files changed, 125 insertions(+), 5 deletions(-) create mode 100644 dlp/redact.py create mode 100644 dlp/redact_test.py diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index d4f6ac865fda..2e95cbb7ffef 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -216,13 +216,8 @@ def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, response = dlp.list_inspect_findings(operation_result.name) - # TODO DO NOT SUBMIT: haven't successfully gotten results object so not sure this is correct if response.result.findings: for finding in response.result.findings: - try: - print('Quote: {}'.format(finding.quote)) - except AttributeError: - pass print('Info type: {}'.format(finding.info_type.name)) print('Likelihood: {}'.format(finding.likelihood)) else: diff --git a/dlp/redact.py b/dlp/redact.py new file mode 100644 index 000000000000..dd9257232ab7 --- /dev/null +++ b/dlp/redact.py @@ -0,0 +1,84 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app that uses the Data Loss Prevent API to inspect a string, a +local file or a file on Google Cloud Storage.""" + +from __future__ import print_function + +import argparse + +# [START redact_string] +def redact_string(item, replace_string, info_types=None, min_likelihood=None): + """Uses the Data Loss Prevention API to redact protected data in a string. + Args: + item: The string to inspect. + replace_string: The string to use to replace protected data; for + instance, '***' or 'REDACTED'. An empty string is permitted. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + Returns: + None; the response from the API is printed to the terminal. + """ + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_type by converting the list of strings into a list of + # dictionaries (protos are also accepted). + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + # Prepare replace_configs, a list of dictionaries. Each dictionary contains + # an info_type and the string to which that info_type will be redacted upon + # detection. This sample uses the same "replace_string" for all info types, + # though the API supports using different ones for each type. + replace_configs = [] + + if info_types is not None: + for info_type in info_types: + replace_configs.append( + {'info_type': info_type, + 'replace_with': replace_string}) + else: + # If no info_type is specified, prepare a single dictionary with only a + # replace_string as a catch-all. + replace_configs.append({'replace_with': replace_string}) + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + redact_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + } + + # Construct the items list (in this case, only one item, in string form). + items = [{'type': 'text/plain', 'value': item}] + + # Call the API. + response = dlp.redact_content(redact_config, items, replace_configs) + + # Print out the results. + print(response.items[0].value) +# [END redact_string] diff --git a/dlp/redact_test.py b/dlp/redact_test.py new file mode 100644 index 000000000000..ce1dbbc2168a --- /dev/null +++ b/dlp/redact_test.py @@ -0,0 +1,41 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import redact + +def test_redact_string(capsys): + test_string = 'I am Gary and my email is gary@example.com' + + redact.redact_string(test_string, 'REDACTED') + + out, _ = capsys.readouterr() + assert 'REDACTED' in out + +def test_redact_string_with_info_types(capsys): + test_string = 'My email is gary@example.com and my number is 206-555-5555' + + redact.redact_string(test_string, 'REDACTED', + info_types=['PHONE_NUMBER']) + + out, _ = capsys.readouterr() + assert 'REDACTED' in out + assert out.count('REDACTED') == 1 + +def test_redact_string_no_findings(capsys): + test_string = 'Nothing to see here' + + redact.redact_string(test_string, 'REDACTED') + + out, _ = capsys.readouterr() + assert 'REDACTED' not in out From 5b20fb2870bd7aa001bb7fc6a37080231398e11a Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Fri, 5 Jan 2018 12:39:34 -0800 Subject: [PATCH 06/11] Add redact --- dlp/inspect_content.py | 11 ++-- dlp/redact.py | 133 ++++++++++++++++++++++++++++++++++++++++- dlp/redact_test.py | 36 +++++++++++ dlp/requirements.txt | 3 +- 4 files changed, 174 insertions(+), 9 deletions(-) diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index 2e95cbb7ffef..d034f87a6c52 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Sample app that uses the Data Loss Prevent API to inspect a string, a +"""Sample app that uses the Data Loss Prevention API to inspect a string, a local file or a file on Google Cloud Storage.""" from __future__ import print_function @@ -49,7 +49,7 @@ def inspect_string(item, info_types=None, min_likelihood=None, # Instantiate a client. dlp = google.cloud.dlp.DlpServiceClient() - # Prepare info_type by converting the list of strings into a list of + # Prepare info_types by converting the list of strings into a list of # dictionaries (protos are also accepted). if info_types is not None: info_types = [{'name': info_type} for info_type in info_types] @@ -116,7 +116,7 @@ def inspect_file(filename, info_types=None, min_likelihood=None, # Instantiate a client. dlp = google.cloud.dlp.DlpServiceClient() - # Prepare info_type by converting the list of strings into a list of + # Prepare info_types by converting the list of strings into a list of # dictionaries (protos are also accepted). if info_types is not None: info_types = [{'name': info_type} for info_type in info_types] @@ -135,7 +135,8 @@ def inspect_file(filename, info_types=None, min_likelihood=None, mime_guess = mimetypes.MimeTypes().guess_type(filename) mime_type = mime_guess[0] or 'application/octet-stream' - # Construct the items list by reading the file as a binary string. + # Construct the items list (in this case, only one item, containing the + # file's byte data). with open(filename, mode='rb') as f: items = [{'type': mime_type, 'data': f.read()}] @@ -185,7 +186,7 @@ def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, # Instantiate a client. dlp = google.cloud.dlp.DlpServiceClient() - # Prepare info_type by converting the list of strings into a list of + # Prepare info_types by converting the list of strings into a list of # dictionaries (protos are also accepted). if info_types is not None: info_types = [{'name': info_type} for info_type in info_types] diff --git a/dlp/redact.py b/dlp/redact.py index dd9257232ab7..0d0054b35678 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -12,12 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Sample app that uses the Data Loss Prevent API to inspect a string, a -local file or a file on Google Cloud Storage.""" +"""Sample app that uses the Data Loss Prevent API to redact the contents of +a string or an image file.""" from __future__ import print_function import argparse +import mimetypes # [START redact_string] def redact_string(item, replace_string, info_types=None, min_likelihood=None): @@ -45,7 +46,7 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None): # Instantiate a client. dlp = google.cloud.dlp.DlpServiceClient() - # Prepare info_type by converting the list of strings into a list of + # Prepare info_types by converting the list of strings into a list of # dictionaries (protos are also accepted). if info_types is not None: info_types = [{'name': info_type} for info_type in info_types] @@ -82,3 +83,129 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None): # Print out the results. print(response.items[0].value) # [END redact_string] + +# [START redact_image] +def redact_image(filename, output_filename, + info_types=None, min_likelihood=None, mime_type=None): + """Uses the Data Loss Prevention API to redact protected data in a string. + Args: + filename: The path to the file to inspect. + output_filename: The path to which the redacted image will be written. + info_types: A list of strings representing info types to look for. + A full list of info type categories can be fetched from the API with + the .list_root_categories(language_code) client method, and a list + of types in a category with .list_info_types(category, + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. + If info_types is omitted, the API will use a limited default set. + min_likelihood: A string representing the minimum likelihood threshold + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. + mime_type: The MIME type of the file. If not specified, the type is + inferred via the Python standard library's mimetypes module. + Returns: + None; the response from the API is printed to the terminal. + """ + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Prepare info_types by converting the list of strings into a list of + # dictionaries (protos are also accepted). The info_types are not submitted + # directly in this example, but are used in the construction of + # image_redaction_configs. + if info_types is not None: + info_types = [{'name': info_type} for info_type in info_types] + + + # Prepare image_redaction_configs, a list of dictionaries. Each dictionary + # contains an info_type and optionally the color used for the replacement. + # The color is omitted in this sample, so the default (black) will be used. + image_redaction_configs = [] + + if info_types is not None: + for info_type in info_types: + image_redaction_configs.append({'info_type': info_type}) + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + redact_config = { + 'min_likelihood': min_likelihood, + } + + # If mime_type is not specified, guess it from the filename. + if mime_type is None: + mime_guess = mimetypes.MimeTypes().guess_type(filename) + mime_type = mime_guess[0] or 'application/octet-stream' + + # Construct the items list (in this case, only one item, containing the + # image file's byte data). + with open(filename, mode='rb') as f: + items = [{'type': mime_type, 'data': f.read()}] + + # Call the API. + response = dlp.redact_content(redact_config, items, None, + image_redaction_configs=image_redaction_configs) + + # Write out the results. + with open(output_filename, mode='wb') as f: + f.write(response.items[0].data) + print("Wrote {byte_count} to {filename}".format( + byte_count=len(response.items[0].data), filename=output_filename)) +# [END redact_string] + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers(dest='content', + help='Select how to submit content to the API.') + + parser_string = subparsers.add_parser('string', help='Inspect a string.') + parser_string.add_argument('item', help='The string to inspect.') + parser_string.add_argument('replace_string', help='The string to use to ' + 'replace protected data; for instance, "***" or "REDACTED".') + parser_string.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of ' + 'info categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser_string.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + + parser_file = subparsers.add_parser('image', help='Inspect an image file.') + parser_file.add_argument('filename', + help='The path to the file to inspect.') + parser_file.add_argument('output_filename', + help='The path to which the redacted image will be written.') + parser_file.add_argument('--info_types', action='append', + help='Strings representing info types to look for. A full list of ' + 'info categories and types is available from the API. Examples ' + 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' + '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' + 'the API will use a limited default set. Specify this flag ' + 'multiple times to specify multiple info types.') + parser_file.add_argument('--min_likelihood', + choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', + 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], + help='A string representing the minimum likelihood threshold that ' + 'constitutes a match.') + parser_file.add_argument('--mime_type', + help='The MIME type of the file. If not specified, the type is ' + 'inferred via the Python standard library\'s mimetypes module.') + + args = parser.parse_args() + + if args.content == 'string': + redact_string( + args.item, args.replace_string, info_types=args.info_types, + min_likelihood=args.min_likelihood) + elif args.content == 'image': + redact_image( + args.filename, args.output_filename, info_types=args.info_types, + min_likelihood=args.min_likelihood, mime_type=args.mime_type) \ No newline at end of file diff --git a/dlp/redact_test.py b/dlp/redact_test.py index ce1dbbc2168a..68c55d1756f2 100644 --- a/dlp/redact_test.py +++ b/dlp/redact_test.py @@ -12,8 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +import tempfile +import shutil +import os + +import pytest + import redact +RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), 'resources') + + +@pytest.fixture(scope='module') +def tempdir(): + tempdir = tempfile.mkdtemp() + yield tempdir +# shutil.rmtree(tempdir) # DO NOT SUBMIT + def test_redact_string(capsys): test_string = 'I am Gary and my email is gary@example.com' @@ -22,6 +37,7 @@ def test_redact_string(capsys): out, _ = capsys.readouterr() assert 'REDACTED' in out + def test_redact_string_with_info_types(capsys): test_string = 'My email is gary@example.com and my number is 206-555-5555' @@ -32,6 +48,7 @@ def test_redact_string_with_info_types(capsys): assert 'REDACTED' in out assert out.count('REDACTED') == 1 + def test_redact_string_no_findings(capsys): test_string = 'Nothing to see here' @@ -39,3 +56,22 @@ def test_redact_string_no_findings(capsys): out, _ = capsys.readouterr() assert 'REDACTED' not in out + +def test_redact_image_file(tempdir, capsys): + test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png') + output_filepath = os.path.join(tempdir, 'redacted.png') + + redact.redact_image(test_filepath, output_filepath) + + out, _ = capsys.readouterr() + assert output_filepath in out + +def test_redact_image_file_with_infotype(tempdir, capsys): + test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png') + output_filepath = os.path.join(tempdir, 'redacted_with_infotype.png') + + redact.redact_image(test_filepath, output_filepath, + info_types=['EMAIL_ADDRESS', 'US_MALE_NAME']) + + out, _ = capsys.readouterr() + assert output_filepath in out diff --git a/dlp/requirements.txt b/dlp/requirements.txt index 23f5d1a115c1..dd0bcf2e2f3a 100644 --- a/dlp/requirements.txt +++ b/dlp/requirements.txt @@ -1,3 +1,4 @@ # DO NOT SUBMIT -/usr/local/google/home/gorcester/src/google-cloud-python/dlp google-cloud-storage +/Users/gorcester/src/google-cloud-python/dlp + From 5c2689a145e1ea78199d464cf9bb99fb17b822c4 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Mon, 8 Jan 2018 14:07:37 -0800 Subject: [PATCH 07/11] Add quickstart and metadata --- dlp/inspect_content.py | 2 +- dlp/inspect_content_test.py | 2 +- dlp/metadata.py | 96 +++++++++++++++++++++++++++++++++++++ dlp/metadata_test.py | 29 +++++++++++ dlp/quickstart.py | 74 ++++++++++++++++++++++++++++ dlp/quickstart_test.py | 21 ++++++++ dlp/redact.py | 4 +- 7 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 dlp/metadata.py create mode 100644 dlp/metadata_test.py create mode 100644 dlp/quickstart.py create mode 100644 dlp/quickstart_test.py diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index d034f87a6c52..d71cf3604517 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -160,7 +160,7 @@ def inspect_file(filename, info_types=None, min_likelihood=None, # [START inspect_gcs_file] def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, max_findings=None): - """Uses the Data Loss Prevention API to analyze a string for protected data. + """Uses the Data Loss Prevention API to analyze a file on GCS. Args: bucket: The name of the GCS bucket containing the file, as a string. filename: The name of the file in the bucket, including the path, as a diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index a4bd5781f428..057e0f29aaf0 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -161,7 +161,7 @@ def test_inspect_gcs_image_file(bucket, capsys): assert 'Info type: EMAIL_ADDRESS' in out -def test_inspect_gcs_multiple_file(bucket, capsys): +def test_inspect_gcs_multiple_files(bucket, capsys): inspect_content.inspect_gcs_file(bucket.name, '*') out, _ = capsys.readouterr() diff --git a/dlp/metadata.py b/dlp/metadata.py new file mode 100644 index 000000000000..8690a06aa724 --- /dev/null +++ b/dlp/metadata.py @@ -0,0 +1,96 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app that queries the Data Loss Prevention API for supported +categories and info types.""" + +from __future__ import print_function + +import argparse + + +# [START list_info_types] +def list_info_types(category, language_code='en-US'): + """List types of sensitive information within a category. + Args: + category: The category of info types to list; e.g. 'PII'. + language_code: The BCP-47 language code to use, e.g. 'en-US'. + Returns: + None; the response from the API is printed to the terminal. + """ + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Make the API call. + response = dlp.list_info_types(category, language_code) + + # Print the results to the console. + print('Info types in {category}:'.format(category=category)) + for info_type in response.info_types: + print('{name}: {display_name}'.format( + name=info_type.name, display_name=info_type.display_name)) +# [END list_info_types] + + +# [START list_categories] +def list_categories(language_code='en-US'): + """List root categories of sensitive information. + Args: + language_code: The BCP-47 language code to use, e.g. 'en-US'. + Returns: + None; the response from the API is printed to the terminal. + """ + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # Make the API call. + response = dlp.list_root_categories(language_code) + + # Print the results to the console. + print('Categories:') + for category in response.categories: + print('{name}: {display_name}'.format( + name=category.name, display_name=category.display_name)) +# [END list_categories] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers(dest='metadata', + help='Select which type of metadata to view.') + + parser_categories = subparsers.add_parser('categories', + help='Fetch the list of info type categories.') + parser_categories.add_argument('--language_code', + help='The BCP-47 language code to use, e.g. \'en-US\'.') + + parser_info_types = subparsers.add_parser('info_types', + help='Fetch the list of info types in a specified category.') + parser_info_types.add_argument('category', + help='The category of info types to list; e.g. \'PII\'.') + parser_info_types.add_argument('--language_code', + help='The BCP-47 language code to use, e.g. \'en-US\'.') + + args = parser.parse_args() + + if args.metadata == 'categories': + list_categories(language_code=args.language_code) + elif args.metadata == 'info_types': + list_info_types(args.category, language_code=args.language_code) diff --git a/dlp/metadata_test.py b/dlp/metadata_test.py new file mode 100644 index 000000000000..816b6f6e4281 --- /dev/null +++ b/dlp/metadata_test.py @@ -0,0 +1,29 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import metadata + + +def test_fetch_categories(capsys): + metadata.list_categories() + + out, _ = capsys.readouterr() + assert 'PII' in out + + +def test_fetch_info_types(capsys): + metadata.list_info_types('PII') + + out, _ = capsys.readouterr() + assert 'EMAIL_ADDRESS' in out diff --git a/dlp/quickstart.py b/dlp/quickstart.py new file mode 100644 index 000000000000..a8804bcad5ba --- /dev/null +++ b/dlp/quickstart.py @@ -0,0 +1,74 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sample app that queries the Data Loss Prevention API for supported +categories and info types.""" + +from __future__ import print_function + +def quickstart(): + """Demonstrates use of the Data Loss Prevention API client library.""" + + # [START quickstart] + # Import the client library + import google.cloud.dlp + + # Instantiate a client. + dlp = google.cloud.dlp.DlpServiceClient() + + # The string to inspect + content = 'Robert Frost' + + # Construct the list of content items to inspect; in this case, only one. + items = [{'type': 'text/plain', 'value': content}] + + # The info types to search for in the content. + info_types = [{'name': 'US_MALE_NAME'}, {'name': 'US_FEMALE_NAME'}] + + # The minimum likelihood to constitute a match. Optional. + min_likelihood = 'LIKELIHOOD_UNSPECIFIED' + + # The maximum number of findings to report (0 = server maximum). Optional. + max_findings = 0 + + # Whether to include the matching string in the results. Optional. + include_quote = True + + # Construct the configuration dictionary. Keys which are None may + # optionally be omitted entirely. + inspect_config = { + 'info_types': info_types, + 'min_likelihood': min_likelihood, + 'max_findings': max_findings, + 'include_quote': include_quote, + } + + # Call the API. + response = dlp.inspect_content(inspect_config, items) + + # Print out the results. + if response.results[0].findings: + for finding in response.results[0].findings: + try: + print('Quote: {}'.format(finding.quote)) + except AttributeError: + pass + print('Info type: {}'.format(finding.info_type.name)) + print('Likelihood: {}'.format(finding.likelihood)) + else: + print('No findings.') + # [END quickstart] + +if __name__ == '__main__': + quickstart() \ No newline at end of file diff --git a/dlp/quickstart_test.py b/dlp/quickstart_test.py new file mode 100644 index 000000000000..f4d6f06ff945 --- /dev/null +++ b/dlp/quickstart_test.py @@ -0,0 +1,21 @@ +# Copyright 2017 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import quickstart + +def test_quickstart(capsys): + quickstart.quickstart() + + out, _ = capsys.readouterr() + assert 'US_MALE_NAME' in out diff --git a/dlp/redact.py b/dlp/redact.py index 0d0054b35678..965b13992c48 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -87,7 +87,7 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None): # [START redact_image] def redact_image(filename, output_filename, info_types=None, min_likelihood=None, mime_type=None): - """Uses the Data Loss Prevention API to redact protected data in a string. + """Uses the Data Loss Prevention API to redact protected data in an image. Args: filename: The path to the file to inspect. output_filename: The path to which the redacted image will be written. @@ -208,4 +208,4 @@ def redact_image(filename, output_filename, elif args.content == 'image': redact_image( args.filename, args.output_filename, info_types=args.info_types, - min_likelihood=args.min_likelihood, mime_type=args.mime_type) \ No newline at end of file + min_likelihood=args.min_likelihood, mime_type=args.mime_type) From 431b43f317162e578c45c64aa5c6ccb0669fbd51 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Mon, 8 Jan 2018 14:22:59 -0800 Subject: [PATCH 08/11] lint --- dlp/inspect_content.py | 99 +++++++++++++++++++------------------ dlp/inspect_content_test.py | 6 +-- dlp/metadata.py | 21 ++++---- dlp/quickstart.py | 4 +- dlp/quickstart_test.py | 1 + dlp/redact.py | 55 +++++++++++---------- dlp/redact_test.py | 16 +++--- 7 files changed, 109 insertions(+), 93 deletions(-) diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index d71cf3604517..ae80fc33883b 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -23,16 +23,12 @@ # [START inspect_string] def inspect_string(item, info_types=None, min_likelihood=None, max_findings=None, include_quote=True): - """Uses the Data Loss Prevention API to analyze a string for protected data. + """Uses the Data Loss Prevention API to analyze strings for protected data. Args: item: The string to inspect. info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. + A full list of info type categories can be fetched from the API. If + info_types is omitted, the API will use a limited default set. min_likelihood: A string representing the minimum likelihood threshold that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. @@ -90,12 +86,8 @@ def inspect_file(filename, info_types=None, min_likelihood=None, Args: filename: The path to the file to inspect. info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. + A full list of info type categories can be fetched from the API. If + info_types is omitted, the API will use a limited default set. min_likelihood: A string representing the minimum likelihood threshold that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. @@ -159,19 +151,15 @@ def inspect_file(filename, info_types=None, min_likelihood=None, # [START inspect_gcs_file] def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, - max_findings=None): + max_findings=None): """Uses the Data Loss Prevention API to analyze a file on GCS. Args: bucket: The name of the GCS bucket containing the file, as a string. filename: The name of the file in the bucket, including the path, as a string; e.g. 'images/myfile.png'. info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. + A full list of info type categories can be fetched from the API. If + info_types is omitted, the API will use a limited default set. min_likelihood: A string representing the minimum likelihood threshold that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. @@ -201,11 +189,11 @@ def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, # Construct a cloud_storage_options dictionary with the file's URL. url = 'gs://{}/{}'.format(bucket, filename) - storage_config = {'cloud_storage_options': - {'file_set': - {'url': url} - } - } + storage_config = { + 'cloud_storage_options': { + 'file_set': {'url': url} + } + } operation = dlp.create_inspect_operation(inspect_config, storage_config, None) @@ -228,73 +216,86 @@ def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) - subparsers = parser.add_subparsers(dest='content', - help='Select how to submit content to the API.') + subparsers = parser.add_subparsers( + dest='content', help='Select how to submit content to the API.') parser_string = subparsers.add_parser('string', help='Inspect a string.') parser_string.add_argument('item', help='The string to inspect.') - parser_string.add_argument('--info_types', action='append', + parser_string.add_argument( + '--info_types', action='append', help='Strings representing info types to look for. A full list of ' 'info categories and types is available from the API. Examples ' 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' 'the API will use a limited default set. Specify this flag ' 'multiple times to specify multiple info types.') - parser_string.add_argument('--min_likelihood', + parser_string.add_argument( + '--min_likelihood', choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], help='A string representing the minimum likelihood threshold that ' 'constitutes a match.') - parser_string.add_argument('--max_findings', type=int, + parser_string.add_argument( + '--max_findings', type=int, help='The maximum number of findings to report; 0 = no maximum.') - parser_string.add_argument('--include_quote', type=bool, + parser_string.add_argument( + '--include_quote', type=bool, help='A boolean for whether to display a quote of the detected ' 'information in the results.') parser_file = subparsers.add_parser('file', help='Inspect a local file.') - parser_file.add_argument('filename', - help='The path to the file to inspect.') - parser_file.add_argument('--info_types', action='append', + parser_file.add_argument( + 'filename', help='The path to the file to inspect.') + parser_file.add_argument( + '--info_types', action='append', help='Strings representing info types to look for. A full list of ' 'info categories and types is available from the API. Examples ' 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' 'the API will use a limited default set. Specify this flag ' 'multiple times to specify multiple info types.') - parser_file.add_argument('--min_likelihood', + parser_file.add_argument( + '--min_likelihood', choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], help='A string representing the minimum likelihood threshold that ' 'constitutes a match.') - parser_file.add_argument('--max_findings', type=int, + parser_file.add_argument( + '--max_findings', type=int, help='The maximum number of findings to report; 0 = no maximum.') - parser_file.add_argument('--include_quote', type=bool, + parser_file.add_argument( + '--include_quote', type=bool, help='A boolean for whether to display a quote of the detected ' 'information in the results.') - parser_file.add_argument('--mime_type', + parser_file.add_argument( + '--mime_type', help='The MIME type of the file. If not specified, the type is ' 'inferred via the Python standard library\'s mimetypes module.') - parser_gcs = subparsers.add_parser('gcs', - help='Inspect files on Google Cloud Storage.') - parser_gcs.add_argument('bucket', - help='The name of the GCS bucket containing the file.') - parser_gcs.add_argument('filename', + parser_gcs = subparsers.add_parser( + 'gcs', help='Inspect files on Google Cloud Storage.') + parser_gcs.add_argument( + 'bucket', help='The name of the GCS bucket containing the file.') + parser_gcs.add_argument( + 'filename', help='The name of the file in the bucket, including the path, e.g. ' '"images/myfile.png". Wildcards are permitted.') - parser_gcs.add_argument('--info_types', action='append', + parser_gcs.add_argument( + '--info_types', action='append', help='Strings representing info types to look for. A full list of ' 'info categories and types is available from the API. Examples ' 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' 'the API will use a limited default set. Specify this flag ' 'multiple times to specify multiple info types.') - parser_gcs.add_argument('--min_likelihood', + parser_gcs.add_argument( + '--min_likelihood', choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], help='A string representing the minimum likelihood threshold that ' 'constitutes a match.') - parser_gcs.add_argument('--max_findings', type=int, + parser_gcs.add_argument( + '--max_findings', type=int, help='The maximum number of findings to report; 0 = no maximum.') args = parser.parse_args() @@ -302,11 +303,13 @@ def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, if args.content == 'string': inspect_string( args.item, info_types=args.info_types, - min_likelihood=args.min_likelihood, include_quote=args.include_quote) + min_likelihood=args.min_likelihood, + include_quote=args.include_quote) elif args.content == 'file': inspect_file( args.filename, info_types=args.info_types, - min_likelihood=args.min_likelihood, include_quote=args.include_quote, + min_likelihood=args.min_likelihood, + include_quote=args.include_quote, mime_type=args.mime_type) elif args.content == 'gcs': inspect_gcs_file( diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index 057e0f29aaf0..a32b8317ef8a 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -14,11 +14,11 @@ import os -import pytest - import google.cloud.exceptions import google.cloud.storage +import pytest + import inspect_content @@ -47,7 +47,7 @@ def bucket(request): blob.upload_from_filename(path) blobs.append(blob) - # Yield the object to the test code; lines after this execute as a teardown. + # Yield the object to the test; lines after this execute as a teardown. yield bucket # Delete the files. diff --git a/dlp/metadata.py b/dlp/metadata.py index 8690a06aa724..1372f6610ae0 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -73,19 +73,22 @@ def list_categories(language_code='en-US'): if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) - subparsers = parser.add_subparsers(dest='metadata', - help='Select which type of metadata to view.') + subparsers = parser.add_subparsers( + dest='metadata', help='Select which type of metadata to view.') - parser_categories = subparsers.add_parser('categories', - help='Fetch the list of info type categories.') - parser_categories.add_argument('--language_code', + parser_categories = subparsers.add_parser( + 'categories', help='Fetch the list of info type categories.') + parser_categories.add_argument( + '--language_code', help='The BCP-47 language code to use, e.g. \'en-US\'.') - parser_info_types = subparsers.add_parser('info_types', + parser_info_types = subparsers.add_parser( + 'info_types', help='Fetch the list of info types in a specified category.') - parser_info_types.add_argument('category', - help='The category of info types to list; e.g. \'PII\'.') - parser_info_types.add_argument('--language_code', + parser_info_types.add_argument( + 'category', help='The category of info types to list; e.g. \'PII\'.') + parser_categories.add_argument( + '--language_code', help='The BCP-47 language code to use, e.g. \'en-US\'.') args = parser.parse_args() diff --git a/dlp/quickstart.py b/dlp/quickstart.py index a8804bcad5ba..40d731433899 100644 --- a/dlp/quickstart.py +++ b/dlp/quickstart.py @@ -17,6 +17,7 @@ from __future__ import print_function + def quickstart(): """Demonstrates use of the Data Loss Prevention API client library.""" @@ -70,5 +71,6 @@ def quickstart(): print('No findings.') # [END quickstart] + if __name__ == '__main__': - quickstart() \ No newline at end of file + quickstart() diff --git a/dlp/quickstart_test.py b/dlp/quickstart_test.py index f4d6f06ff945..5b8faf88099d 100644 --- a/dlp/quickstart_test.py +++ b/dlp/quickstart_test.py @@ -14,6 +14,7 @@ import quickstart + def test_quickstart(capsys): quickstart.quickstart() diff --git a/dlp/redact.py b/dlp/redact.py index 965b13992c48..8666d761c78f 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -20,6 +20,7 @@ import argparse import mimetypes + # [START redact_string] def redact_string(item, replace_string, info_types=None, min_likelihood=None): """Uses the Data Loss Prevention API to redact protected data in a string. @@ -28,12 +29,8 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None): replace_string: The string to use to replace protected data; for instance, '***' or 'REDACTED'. An empty string is permitted. info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. + A full list of info type categories can be fetched from the API. If + info_types is omitted, the API will use a limited default set. min_likelihood: A string representing the minimum likelihood threshold that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. @@ -84,6 +81,7 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None): print(response.items[0].value) # [END redact_string] + # [START redact_image] def redact_image(filename, output_filename, info_types=None, min_likelihood=None, mime_type=None): @@ -92,12 +90,8 @@ def redact_image(filename, output_filename, filename: The path to the file to inspect. output_filename: The path to which the redacted image will be written. info_types: A list of strings representing info types to look for. - A full list of info type categories can be fetched from the API with - the .list_root_categories(language_code) client method, and a list - of types in a category with .list_info_types(category, - language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', - 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. - If info_types is omitted, the API will use a limited default set. + A full list of info type categories can be fetched from the API. If + info_types is omitted, the API will use a limited default set. min_likelihood: A string representing the minimum likelihood threshold that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. @@ -119,7 +113,6 @@ def redact_image(filename, output_filename, if info_types is not None: info_types = [{'name': info_type} for info_type in info_types] - # Prepare image_redaction_configs, a list of dictionaries. Each dictionary # contains an info_type and optionally the color used for the replacement. # The color is omitted in this sample, so the default (black) will be used. @@ -146,7 +139,8 @@ def redact_image(filename, output_filename, items = [{'type': mime_type, 'data': f.read()}] # Call the API. - response = dlp.redact_content(redact_config, items, None, + response = dlp.redact_content( + redact_config, items, None, image_redaction_configs=image_redaction_configs) # Write out the results. @@ -156,46 +150,55 @@ def redact_image(filename, output_filename, byte_count=len(response.items[0].data), filename=output_filename)) # [END redact_string] + if __name__ == '__main__': parser = argparse.ArgumentParser(description=__doc__) - subparsers = parser.add_subparsers(dest='content', - help='Select how to submit content to the API.') + subparsers = parser.add_subparsers( + dest='content', help='Select how to submit content to the API.') parser_string = subparsers.add_parser('string', help='Inspect a string.') parser_string.add_argument('item', help='The string to inspect.') - parser_string.add_argument('replace_string', help='The string to use to ' - 'replace protected data; for instance, "***" or "REDACTED".') - parser_string.add_argument('--info_types', action='append', + parser_string.add_argument( + 'replace_string', + help='The string to use to replace protected data; for instance, ' + '"***" or "REDACTED".') + parser_string.add_argument( + '--info_types', action='append', help='Strings representing info types to look for. A full list of ' 'info categories and types is available from the API. Examples ' 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' 'the API will use a limited default set. Specify this flag ' 'multiple times to specify multiple info types.') - parser_string.add_argument('--min_likelihood', + parser_string.add_argument( + '--min_likelihood', choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], help='A string representing the minimum likelihood threshold that ' 'constitutes a match.') parser_file = subparsers.add_parser('image', help='Inspect an image file.') - parser_file.add_argument('filename', - help='The path to the file to inspect.') - parser_file.add_argument('output_filename', + parser_file.add_argument( + 'filename', help='The path to the file to inspect.') + parser_file.add_argument( + 'output_filename', help='The path to which the redacted image will be written.') - parser_file.add_argument('--info_types', action='append', + parser_file.add_argument( + '--info_types', action='append', help='Strings representing info types to look for. A full list of ' 'info categories and types is available from the API. Examples ' 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", ' '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, ' 'the API will use a limited default set. Specify this flag ' 'multiple times to specify multiple info types.') - parser_file.add_argument('--min_likelihood', + parser_file.add_argument( + '--min_likelihood', choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'], help='A string representing the minimum likelihood threshold that ' 'constitutes a match.') - parser_file.add_argument('--mime_type', + parser_file.add_argument( + '--mime_type', help='The MIME type of the file. If not specified, the type is ' 'inferred via the Python standard library\'s mimetypes module.') diff --git a/dlp/redact_test.py b/dlp/redact_test.py index 68c55d1756f2..73d4cab20224 100644 --- a/dlp/redact_test.py +++ b/dlp/redact_test.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile -import shutil import os +import shutil +import tempfile import pytest @@ -27,7 +27,8 @@ def tempdir(): tempdir = tempfile.mkdtemp() yield tempdir -# shutil.rmtree(tempdir) # DO NOT SUBMIT + shutil.rmtree(tempdir) + def test_redact_string(capsys): test_string = 'I am Gary and my email is gary@example.com' @@ -41,8 +42,8 @@ def test_redact_string(capsys): def test_redact_string_with_info_types(capsys): test_string = 'My email is gary@example.com and my number is 206-555-5555' - redact.redact_string(test_string, 'REDACTED', - info_types=['PHONE_NUMBER']) + redact.redact_string( + test_string, 'REDACTED', info_types=['PHONE_NUMBER']) out, _ = capsys.readouterr() assert 'REDACTED' in out @@ -57,6 +58,7 @@ def test_redact_string_no_findings(capsys): out, _ = capsys.readouterr() assert 'REDACTED' not in out + def test_redact_image_file(tempdir, capsys): test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png') output_filepath = os.path.join(tempdir, 'redacted.png') @@ -66,11 +68,13 @@ def test_redact_image_file(tempdir, capsys): out, _ = capsys.readouterr() assert output_filepath in out + def test_redact_image_file_with_infotype(tempdir, capsys): test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png') output_filepath = os.path.join(tempdir, 'redacted_with_infotype.png') - redact.redact_image(test_filepath, output_filepath, + redact.redact_image( + test_filepath, output_filepath, info_types=['EMAIL_ADDRESS', 'US_MALE_NAME']) out, _ = capsys.readouterr() From 27e99b1b144d43b0c973d56381df26ad4047e1aa Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Fri, 23 Feb 2018 14:04:26 -0800 Subject: [PATCH 09/11] use real requirements; remove debugging statement --- dlp/inspect_content_test.py | 2 -- dlp/requirements.txt | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index a32b8317ef8a..e6de4245f75d 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -57,8 +57,6 @@ def bucket(request): # Attempt to delete the bucket; this will only work if it is empty. bucket.delete() - print('teardown complete') - def test_inspect_string(capsys): test_string = 'I am Gary and my email is gary@example.com' diff --git a/dlp/requirements.txt b/dlp/requirements.txt index dd0bcf2e2f3a..1c24785056db 100644 --- a/dlp/requirements.txt +++ b/dlp/requirements.txt @@ -1,4 +1,2 @@ -# DO NOT SUBMIT -google-cloud-storage -/Users/gorcester/src/google-cloud-python/dlp - +google-cloud-dlp==0.1.0 +google-cloud-storage==1.7.0 From f1c0a25b3a0d459788de596ec969a7379c30681a Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Fri, 23 Feb 2018 15:10:44 -0800 Subject: [PATCH 10/11] add README --- dlp/README.rst | 183 ++++++++++++++++++++++++++++++++++++++++++++++ dlp/README.rst.in | 31 ++++++++ dlp/metadata.py | 2 +- 3 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 dlp/README.rst create mode 100644 dlp/README.rst.in diff --git a/dlp/README.rst b/dlp/README.rst new file mode 100644 index 000000000000..bd02346781ab --- /dev/null +++ b/dlp/README.rst @@ -0,0 +1,183 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Data Loss Prevention Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dlp/README.rst + + +This directory contains samples for Google Data Loss Prevention. `Google Data Loss Prevention`_ provides programmatic access to a powerful detection engine for personally identifiable information and other privacy-sensitive data in unstructured data streams. + + + + +.. _Google Data Loss Prevention: https://cloud.google.com/dlp/docs/ + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + +Samples +------------------------------------------------------------------------------- + +Quickstart ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dlp/quickstart.py;dlp/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python quickstart.py + + +Inspect Content ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dlp/inspect_content.py;dlp/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python inspect_content.py + + usage: inspect_content.py [-h] {string,file,gcs} ... + + Sample app that uses the Data Loss Prevention API to inspect a string, a local + file or a file on Google Cloud Storage. + + positional arguments: + {string,file,gcs} Select how to submit content to the API. + string Inspect a string. + file Inspect a local file. + gcs Inspect files on Google Cloud Storage. + + optional arguments: + -h, --help show this help message and exit + + + +Redact Content ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dlp/redact.py;dlp/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python redact.py + + usage: redact.py [-h] {string,image} ... + + Sample app that uses the Data Loss Prevent API to redact the contents of a + string or an image file. + + positional arguments: + {string,image} Select how to submit content to the API. + string Inspect a string. + image Inspect an image file. + + optional arguments: + -h, --help show this help message and exit + + + +Display Metadata ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dlp/metadata.py;dlp/README.rst + + + + +To run this sample: + +.. code-block:: bash + + $ python metadata.py + + usage: metadata.py [-h] {categories,info_types} ... + + Sample app that queries the Data Loss Prevention API for supported categories + and info types. + + positional arguments: + {categories,info_types} + Select which type of metadata to view. + categories Fetch the list of info type categories. + info_types Fetch the list of info types in a specified category. + + optional arguments: + -h, --help show this help message and exit + + + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/dlp/README.rst.in b/dlp/README.rst.in new file mode 100644 index 000000000000..a8a1cf5caf89 --- /dev/null +++ b/dlp/README.rst.in @@ -0,0 +1,31 @@ +# This file is used to generate README.rst + +product: + name: Google Data Loss Prevention + short_name: Data Loss Prevention + url: https://cloud.google.com/dlp/docs/ + description: > + `Google Data Loss Prevention`_ provides programmatic access to a powerful + detection engine for personally identifiable information and other + privacy-sensitive data in unstructured data streams. + +setup: +- auth +- install_deps + +samples: +- name: Quickstart + file: quickstart.py +- name: Inspect Content + file: inspect_content.py + show_help: true +- name: Redact Content + file: redact.py + show_help: true +- name: Display Metadata + file: metadata.py + show_help: true + +cloud_client_library: true + +folder: dlp \ No newline at end of file diff --git a/dlp/metadata.py b/dlp/metadata.py index 1372f6610ae0..fbe88ec6b839 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -87,7 +87,7 @@ def list_categories(language_code='en-US'): help='Fetch the list of info types in a specified category.') parser_info_types.add_argument( 'category', help='The category of info types to list; e.g. \'PII\'.') - parser_categories.add_argument( + parser_info_types.add_argument( '--language_code', help='The BCP-47 language code to use, e.g. \'en-US\'.') From d0e0afcca4f26292b8d2bc80454905b150af9d0a Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Fri, 23 Feb 2018 15:14:19 -0800 Subject: [PATCH 11/11] add beta notice --- dlp/README.rst | 2 +- dlp/README.rst.in | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dlp/README.rst b/dlp/README.rst index bd02346781ab..bcfca5d3e564 100644 --- a/dlp/README.rst +++ b/dlp/README.rst @@ -7,7 +7,7 @@ Google Data Loss Prevention Python Samples :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dlp/README.rst -This directory contains samples for Google Data Loss Prevention. `Google Data Loss Prevention`_ provides programmatic access to a powerful detection engine for personally identifiable information and other privacy-sensitive data in unstructured data streams. +This directory contains samples for Google Data Loss Prevention. `Google Data Loss Prevention`_ provides programmatic access to a powerful detection engine for personally identifiable information and other privacy-sensitive data in unstructured data streams. **This api is currently in beta**. diff --git a/dlp/README.rst.in b/dlp/README.rst.in index a8a1cf5caf89..57c73a743338 100644 --- a/dlp/README.rst.in +++ b/dlp/README.rst.in @@ -8,6 +8,7 @@ product: `Google Data Loss Prevention`_ provides programmatic access to a powerful detection engine for personally identifiable information and other privacy-sensitive data in unstructured data streams. + **This api is currently in beta**. setup: - auth