diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index b1e97ae31475..9e50ecbdf79d 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -474,6 +474,7 @@ def inspect_gcs_file( operation = dlp.create_dlp_job(parent, inspect_job=inspect_job) print("Inspection operation started: {}".format(operation.name)) + # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. subscriber = google.cloud.pubsub.SubscriberClient() @@ -636,6 +637,7 @@ def inspect_datastore( } operation = dlp.create_dlp_job(parent, inspect_job=inspect_job) + print("Inspection operation started: {}".format(operation.name)) # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. @@ -802,6 +804,7 @@ def inspect_bigquery( } operation = dlp.create_dlp_job(parent, inspect_job=inspect_job) + print("Inspection operation started: {}".format(operation.name)) # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index ad493ecce710..ea100d16d84a 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -15,8 +15,6 @@ import os import uuid -from gcp_devrel.testing import eventually_consistent -from gcp_devrel.testing.flaky import flaky import google.api_core.exceptions import google.cloud.bigquery import google.cloud.datastore @@ -24,10 +22,11 @@ import google.cloud.exceptions import google.cloud.pubsub import google.cloud.storage - import pytest + import inspect_content + UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") @@ -95,7 +94,8 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) + subscription_path = subscriber.subscription_path( + GCLOUD_PROJECT, SUBSCRIPTION_ID) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -289,114 +289,114 @@ def test_inspect_image_file(capsys): assert "Info type: PHONE_NUMBER" in out +def cancel_operation(out): + if "Inspection operation started" in out: + # Cancel the operation + operation_id = out.split( + "Inspection operation started: ")[1].split("\n")[0] + client = google.cloud.dlp_v2.DlpServiceClient() + client.cancel_dlp_job(operation_id) + + def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys): - inspect_content.inspect_gcs_file( - GCLOUD_PROJECT, - bucket.name, - "test.txt", - topic_id, - subscription_id, - ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=420, - ) + try: + inspect_content.inspect_gcs_file( + GCLOUD_PROJECT, + bucket.name, + "test.txt", + topic_id, + subscription_id, + ["EMAIL_ADDRESS", "PHONE_NUMBER"], + timeout=1 + ) - out, _ = capsys.readouterr() - assert "Inspection operation started" in out - # Cancel the operation - operation_id = out.split("Inspection operation started: ")[1].split("\n")[0] - print(operation_id) - client = google.cloud.dlp_v2.DlpServiceClient() - client.cancel_dlp_job(operation_id) + out, _ = capsys.readouterr() + assert "Inspection operation started" in out + finally: + cancel_operation(out) def test_inspect_gcs_file_with_custom_info_types( - bucket, topic_id, subscription_id, capsys -): - dictionaries = ["gary@somedomain.com"] - regexes = ["\\(\\d{3}\\) \\d{3}-\\d{4}"] + bucket, topic_id, subscription_id, capsys): + try: + dictionaries = ["gary@somedomain.com"] + regexes = ["\\(\\d{3}\\) \\d{3}-\\d{4}"] - inspect_content.inspect_gcs_file( - GCLOUD_PROJECT, - bucket.name, - "test.txt", - topic_id, - subscription_id, - [], - custom_dictionaries=dictionaries, - custom_regexes=regexes, - timeout=420, - ) + inspect_content.inspect_gcs_file( + GCLOUD_PROJECT, + bucket.name, + "test.txt", + topic_id, + subscription_id, + [], + custom_dictionaries=dictionaries, + custom_regexes=regexes, + timeout=1) - out, _ = capsys.readouterr() + out, _ = capsys.readouterr() - assert "Inspection operation started" in out - # Cancel the operation - operation_id = out.split("Inspection operation started: ")[1].split("\n")[0] - print(operation_id) - client = google.cloud.dlp_v2.DlpServiceClient() - client.cancel_dlp_job(operation_id) + assert "Inspection operation started" in out + finally: + cancel_operation(out) -def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys): - inspect_content.inspect_gcs_file( - GCLOUD_PROJECT, - bucket.name, - "harmless.txt", - topic_id, - subscription_id, - ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=420, - ) +def test_inspect_gcs_file_no_results( + bucket, topic_id, subscription_id, capsys): + try: + inspect_content.inspect_gcs_file( + GCLOUD_PROJECT, + bucket.name, + "harmless.txt", + topic_id, + subscription_id, + ["EMAIL_ADDRESS", "PHONE_NUMBER"], + timeout=1) - out, _ = capsys.readouterr() + out, _ = capsys.readouterr() - assert "Inspection operation started" in out - # Cancel the operation - operation_id = out.split("Inspection operation started: ")[1].split("\n")[0] - print(operation_id) - client = google.cloud.dlp_v2.DlpServiceClient() - client.cancel_dlp_job(operation_id) + assert "Inspection operation started" in out + finally: + cancel_operation(out) -@pytest.mark.skip(reason="nondeterministically failing") def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys): - inspect_content.inspect_gcs_file( - GCLOUD_PROJECT, - bucket.name, - "test.png", - topic_id, - subscription_id, - ["EMAIL_ADDRESS", "PHONE_NUMBER"], - ) + try: + inspect_content.inspect_gcs_file( + GCLOUD_PROJECT, + bucket.name, + "test.png", + topic_id, + subscription_id, + ["EMAIL_ADDRESS", "PHONE_NUMBER"], + timeout=1) - out, _ = capsys.readouterr() - assert "Info type: EMAIL_ADDRESS" in out + out, _ = capsys.readouterr() + assert "Inspection operation started" in out + finally: + cancel_operation(out) def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): - inspect_content.inspect_gcs_file( - GCLOUD_PROJECT, - bucket.name, - "*", - topic_id, - subscription_id, - ["EMAIL_ADDRESS", "PHONE_NUMBER"], - ) + try: + inspect_content.inspect_gcs_file( + GCLOUD_PROJECT, + bucket.name, + "*", + topic_id, + subscription_id, + ["EMAIL_ADDRESS", "PHONE_NUMBER"], + timeout=1) - out, _ = capsys.readouterr() + out, _ = capsys.readouterr() - assert "Inspection operation started" in out - # Cancel the operation - operation_id = out.split("Inspection operation started: ")[1].split("\n")[0] - print(operation_id) - client = google.cloud.dlp_v2.DlpServiceClient() - client.cancel_dlp_job(operation_id) + assert "Inspection operation started" in out + finally: + cancel_operation(out) -@flaky -def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys): - @eventually_consistent.call - def _(): +def test_inspect_datastore( + datastore_project, topic_id, subscription_id, capsys): + try: inspect_content.inspect_datastore( GCLOUD_PROJECT, datastore_project, @@ -404,18 +404,17 @@ def _(): topic_id, subscription_id, ["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"], - ) + timeout=1) out, _ = capsys.readouterr() - assert "Info type: EMAIL_ADDRESS" in out + assert "Inspection operation started" in out + finally: + cancel_operation(out) -@flaky def test_inspect_datastore_no_results( - datastore_project, topic_id, subscription_id, capsys -): - @eventually_consistent.call - def _(): + datastore_project, topic_id, subscription_id, capsys): + try: inspect_content.inspect_datastore( GCLOUD_PROJECT, datastore_project, @@ -423,23 +422,27 @@ def _(): topic_id, subscription_id, ["PHONE_NUMBER"], - ) + timeout=1) out, _ = capsys.readouterr() - assert "No findings" in out + assert "Inspection operation started" in out + finally: + cancel_operation(out) -@pytest.mark.skip(reason="unknown issue") def test_inspect_bigquery(bigquery_project, topic_id, subscription_id, capsys): - inspect_content.inspect_bigquery( - GCLOUD_PROJECT, - bigquery_project, - BIGQUERY_DATASET_ID, - BIGQUERY_TABLE_ID, - topic_id, - subscription_id, - ["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"], - ) + try: + inspect_content.inspect_bigquery( + GCLOUD_PROJECT, + bigquery_project, + BIGQUERY_DATASET_ID, + BIGQUERY_TABLE_ID, + topic_id, + subscription_id, + ["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"], + timeout=1) - out, _ = capsys.readouterr() - assert "Info type: FIRST_NAME" in out + out, _ = capsys.readouterr() + assert "Inspection operation started" in out + finally: + cancel_operation(out) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index 98acb7464e38..b3910dd5b330 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -13,7 +13,7 @@ # limitations under the License. import os -from flaky import flaky +import uuid import pytest @@ -24,6 +24,7 @@ TEST_TABLE_PROJECT_ID = "bigquery-public-data" TEST_DATASET_ID = "san_francisco" TEST_TABLE_ID = "bikeshare_trips" +test_job_id = "test-job-{}".format(uuid.uuid4()) @pytest.fixture(scope="module") @@ -46,7 +47,7 @@ def test_job_name(): }, } - response = dlp.create_dlp_job(parent, risk_job=risk_job) + response = dlp.create_dlp_job(parent, risk_job=risk_job, job_id=test_job_id) full_path = response.name # API expects only job name, not full project path job_name = full_path[full_path.rfind("/") + 1:] @@ -66,11 +67,10 @@ def test_list_dlp_jobs(test_job_name, capsys): assert test_job_name not in out -@flaky def test_list_dlp_jobs_with_filter(test_job_name, capsys): jobs.list_dlp_jobs( GCLOUD_PROJECT, - filter_string="state=RUNNING", + filter_string="state=RUNNING OR state=DONE", job_type="RISK_ANALYSIS_JOB", ) diff --git a/dlp/requirements-test.txt b/dlp/requirements-test.txt index d1ad7a9fd107..5776d6748130 100644 --- a/dlp/requirements-test.txt +++ b/dlp/requirements-test.txt @@ -1,4 +1,4 @@ pytest==5.3.2 -gcp-devrel-py-tools==0.0.15 flaky==3.6.1 mock==3.0.5 + diff --git a/dlp/risk.py b/dlp/risk.py index 386f05c0d73d..a31dfb12c6ef 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -56,31 +56,6 @@ def numerical_risk_analysis( # potentially long-running operations. import google.cloud.pubsub - def callback(message): - if message.attributes["DlpJobName"] == operation.name: - # This is the message we're looking for, so acknowledge it. - message.ack() - - # Now that the job is done, fetch the results and print them. - job = dlp.get_dlp_job(operation.name) - results = job.risk_details.numerical_stats_result - print( - "Value Range: [{}, {}]".format( - results.min_value.integer_value, - results.max_value.integer_value, - ) - ) - prev_value = None - for percent, result in enumerate(results.quantile_values): - value = result.integer_value - if prev_value != value: - print("Value at {}% quantile: {}".format(percent, value)) - prev_value = value - subscription.set_result(None) - else: - # This is not the message we're looking for. - message.drop() - # Instantiate a client. dlp = google.cloud.dlp_v2.DlpServiceClient() @@ -107,15 +82,40 @@ def callback(message): "actions": actions, } + # Call API to start risk analysis job + operation = dlp.create_dlp_job(parent, risk_job=risk_job) + + def callback(message): + if message.attributes["DlpJobName"] == operation.name: + # This is the message we're looking for, so acknowledge it. + message.ack() + + # Now that the job is done, fetch the results and print them. + job = dlp.get_dlp_job(operation.name) + results = job.risk_details.numerical_stats_result + print( + "Value Range: [{}, {}]".format( + results.min_value.integer_value, + results.max_value.integer_value, + ) + ) + prev_value = None + for percent, result in enumerate(results.quantile_values): + value = result.integer_value + if prev_value != value: + print("Value at {}% quantile: {}".format(percent, value)) + prev_value = value + subscription.set_result(None) + else: + # This is not the message we're looking for. + message.drop() + # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. subscriber = google.cloud.pubsub.SubscriberClient() subscription_path = subscriber.subscription_path(project, subscription_id) subscription = subscriber.subscribe(subscription_path, callback) - # Call API to start risk analysis job - operation = dlp.create_dlp_job(parent, risk_job=risk_job) - try: subscription.result(timeout=timeout) except TimeoutError: @@ -166,6 +166,35 @@ def categorical_risk_analysis( # potentially long-running operations. import google.cloud.pubsub + # Instantiate a client. + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Location info of the BigQuery table. + source_table = { + "project_id": table_project_id, + "dataset_id": dataset_id, + "table_id": table_id, + } + + # Tell the API where to send a notification when the job is complete. + actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + + # Configure risk analysis job + # Give the name of the numeric column to compute risk metrics for + risk_job = { + "privacy_metric": { + "categorical_stats_config": {"field": {"name": column_name}} + }, + "source_table": source_table, + "actions": actions, + } + + # Call API to start risk analysis job + operation = dlp.create_dlp_job(parent, risk_job=risk_job) + def callback(message): if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. @@ -201,41 +230,12 @@ def callback(message): # This is not the message we're looking for. message.drop() - # Instantiate a client. - dlp = google.cloud.dlp_v2.DlpServiceClient() - - # Convert the project id into a full resource id. - parent = dlp.project_path(project) - - # Location info of the BigQuery table. - source_table = { - "project_id": table_project_id, - "dataset_id": dataset_id, - "table_id": table_id, - } - - # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] - - # Configure risk analysis job - # Give the name of the numeric column to compute risk metrics for - risk_job = { - "privacy_metric": { - "categorical_stats_config": {"field": {"name": column_name}} - }, - "source_table": source_table, - "actions": actions, - } - # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. subscriber = google.cloud.pubsub.SubscriberClient() subscription_path = subscriber.subscription_path(project, subscription_id) subscription = subscriber.subscribe(subscription_path, callback) - # Call API to start risk analysis job - operation = dlp.create_dlp_job(parent, risk_job=risk_job) - try: subscription.result(timeout=timeout) except TimeoutError: @@ -290,6 +290,39 @@ def k_anonymity_analysis( def get_values(obj): return int(obj.integer_value) + # Instantiate a client. + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Location info of the BigQuery table. + source_table = { + "project_id": table_project_id, + "dataset_id": dataset_id, + "table_id": table_id, + } + + # Convert quasi id list to Protobuf type + def map_fields(field): + return {"name": field} + + quasi_ids = map(map_fields, quasi_ids) + + # Tell the API where to send a notification when the job is complete. + actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + + # Configure risk analysis job + # Give the name of the numeric column to compute risk metrics for + risk_job = { + "privacy_metric": {"k_anonymity_config": {"quasi_ids": quasi_ids}}, + "source_table": source_table, + "actions": actions, + } + + # Call API to start risk analysis job + operation = dlp.create_dlp_job(parent, risk_job=risk_job) + def callback(message): if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. @@ -326,45 +359,12 @@ def callback(message): # This is not the message we're looking for. message.drop() - # Instantiate a client. - dlp = google.cloud.dlp_v2.DlpServiceClient() - - # Convert the project id into a full resource id. - parent = dlp.project_path(project) - - # Location info of the BigQuery table. - source_table = { - "project_id": table_project_id, - "dataset_id": dataset_id, - "table_id": table_id, - } - - # Convert quasi id list to Protobuf type - def map_fields(field): - return {"name": field} - - quasi_ids = map(map_fields, quasi_ids) - - # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] - - # Configure risk analysis job - # Give the name of the numeric column to compute risk metrics for - risk_job = { - "privacy_metric": {"k_anonymity_config": {"quasi_ids": quasi_ids}}, - "source_table": source_table, - "actions": actions, - } - # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. subscriber = google.cloud.pubsub.SubscriberClient() subscription_path = subscriber.subscription_path(project, subscription_id) subscription = subscriber.subscribe(subscription_path, callback) - # Call API to start risk analysis job - operation = dlp.create_dlp_job(parent, risk_job=risk_job) - try: subscription.result(timeout=timeout) except TimeoutError: @@ -421,6 +421,44 @@ def l_diversity_analysis( def get_values(obj): return int(obj.integer_value) + # Instantiate a client. + dlp = google.cloud.dlp_v2.DlpServiceClient() + + # Convert the project id into a full resource id. + parent = dlp.project_path(project) + + # Location info of the BigQuery table. + source_table = { + "project_id": table_project_id, + "dataset_id": dataset_id, + "table_id": table_id, + } + + # Convert quasi id list to Protobuf type + def map_fields(field): + return {"name": field} + + quasi_ids = map(map_fields, quasi_ids) + + # Tell the API where to send a notification when the job is complete. + actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + + # Configure risk analysis job + # Give the name of the numeric column to compute risk metrics for + risk_job = { + "privacy_metric": { + "l_diversity_config": { + "quasi_ids": quasi_ids, + "sensitive_attribute": {"name": sensitive_attribute}, + } + }, + "source_table": source_table, + "actions": actions, + } + + # Call API to start risk analysis job + operation = dlp.create_dlp_job(parent, risk_job=risk_job) + def callback(message): if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. @@ -464,50 +502,12 @@ def callback(message): # This is not the message we're looking for. message.drop() - # Instantiate a client. - dlp = google.cloud.dlp_v2.DlpServiceClient() - - # Convert the project id into a full resource id. - parent = dlp.project_path(project) - - # Location info of the BigQuery table. - source_table = { - "project_id": table_project_id, - "dataset_id": dataset_id, - "table_id": table_id, - } - - # Convert quasi id list to Protobuf type - def map_fields(field): - return {"name": field} - - quasi_ids = map(map_fields, quasi_ids) - - # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] - - # Configure risk analysis job - # Give the name of the numeric column to compute risk metrics for - risk_job = { - "privacy_metric": { - "l_diversity_config": { - "quasi_ids": quasi_ids, - "sensitive_attribute": {"name": sensitive_attribute}, - } - }, - "source_table": source_table, - "actions": actions, - } - # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. subscriber = google.cloud.pubsub.SubscriberClient() subscription_path = subscriber.subscription_path(project, subscription_id) subscription = subscriber.subscribe(subscription_path, callback) - # Call API to start risk analysis job - operation = dlp.create_dlp_job(parent, risk_job=risk_job) - try: subscription.result(timeout=timeout) except TimeoutError: @@ -571,41 +571,6 @@ def k_map_estimate_analysis( def get_values(obj): return int(obj.integer_value) - def callback(message): - if message.attributes["DlpJobName"] == operation.name: - # This is the message we're looking for, so acknowledge it. - message.ack() - - # Now that the job is done, fetch the results and print them. - job = dlp.get_dlp_job(operation.name) - histogram_buckets = ( - job.risk_details.k_map_estimation_result.k_map_estimation_histogram - ) - # Print bucket stats - for i, bucket in enumerate(histogram_buckets): - print("Bucket {}:".format(i)) - print( - " Anonymity range: [{}, {}]".format( - bucket.min_anonymity, bucket.max_anonymity - ) - ) - print(" Size: {}".format(bucket.bucket_size)) - for value_bucket in bucket.bucket_values: - print( - " Values: {}".format( - map(get_values, value_bucket.quasi_ids_values) - ) - ) - print( - " Estimated k-map anonymity: {}".format( - value_bucket.estimated_anonymity - ) - ) - subscription.set_result(None) - else: - # This is not the message we're looking for. - message.drop() - # Instantiate a client. dlp = google.cloud.dlp_v2.DlpServiceClient() @@ -648,15 +613,50 @@ def map_fields(quasi_id, info_type): "actions": actions, } + # Call API to start risk analysis job + operation = dlp.create_dlp_job(parent, risk_job=risk_job) + + def callback(message): + if message.attributes["DlpJobName"] == operation.name: + # This is the message we're looking for, so acknowledge it. + message.ack() + + # Now that the job is done, fetch the results and print them. + job = dlp.get_dlp_job(operation.name) + histogram_buckets = ( + job.risk_details.k_map_estimation_result.k_map_estimation_histogram + ) + # Print bucket stats + for i, bucket in enumerate(histogram_buckets): + print("Bucket {}:".format(i)) + print( + " Anonymity range: [{}, {}]".format( + bucket.min_anonymity, bucket.max_anonymity + ) + ) + print(" Size: {}".format(bucket.bucket_size)) + for value_bucket in bucket.bucket_values: + print( + " Values: {}".format( + map(get_values, value_bucket.quasi_ids_values) + ) + ) + print( + " Estimated k-map anonymity: {}".format( + value_bucket.estimated_anonymity + ) + ) + subscription.set_result(None) + else: + # This is not the message we're looking for. + message.drop() + # Create a Pub/Sub client and find the subscription. The subscription is # expected to already be listening to the topic. subscriber = google.cloud.pubsub.SubscriberClient() subscription_path = subscriber.subscription_path(project, subscription_id) subscription = subscriber.subscribe(subscription_path, callback) - # Call API to start risk analysis job - operation = dlp.create_dlp_job(parent, risk_job=risk_job) - try: subscription.result(timeout=timeout) except TimeoutError: diff --git a/dlp/risk_test.py b/dlp/risk_test.py index 41b514f4da74..5f172bcbc8d2 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -12,14 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from flaky import flaky +import os import uuid import google.cloud.pubsub import google.cloud.bigquery - import pytest -import os import risk @@ -160,7 +158,7 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) -@flaky +@pytest.mark.flaky def test_numerical_risk_analysis( topic_id, subscription_id, bigquery_project, capsys ): @@ -178,7 +176,7 @@ def test_numerical_risk_analysis( assert "Value Range:" in out -@flaky +@pytest.mark.flaky def test_categorical_risk_analysis_on_string_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -197,7 +195,7 @@ def test_categorical_risk_analysis_on_string_field( assert "Most common value occurs" in out -@flaky +@pytest.mark.flaky def test_categorical_risk_analysis_on_number_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -215,7 +213,7 @@ def test_categorical_risk_analysis_on_number_field( assert "Most common value occurs" in out -@flaky +@pytest.mark.flaky def test_k_anonymity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -234,7 +232,7 @@ def test_k_anonymity_analysis_single_field( assert "Class size:" in out -@flaky +@pytest.mark.flaky def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, bigquery_project, capsys ): @@ -253,7 +251,7 @@ def test_k_anonymity_analysis_multiple_fields( assert "Class size:" in out -@flaky +@pytest.mark.flaky def test_l_diversity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -274,7 +272,7 @@ def test_l_diversity_analysis_single_field( assert "Sensitive value" in out -@flaky +@pytest.mark.flaky def test_l_diversity_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -295,7 +293,7 @@ def test_l_diversity_analysis_multiple_field( assert "Sensitive value" in out -@flaky +@pytest.mark.flaky def test_k_map_estimate_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -316,7 +314,7 @@ def test_k_map_estimate_analysis_single_field( assert "Values" in out -@flaky +@pytest.mark.flaky def test_k_map_estimate_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -337,7 +335,7 @@ def test_k_map_estimate_analysis_multiple_field( assert "Values" in out -@flaky +@pytest.mark.flaky def test_k_map_estimate_analysis_quasi_ids_info_types_equal( topic_id, subscription_id, bigquery_project ):