diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index ea100d16d84a..1a2037f59dab 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import time import uuid import google.api_core.exceptions @@ -40,6 +41,8 @@ BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING +TIMEOUT = 120 + @pytest.fixture(scope="module") def bucket(): @@ -74,7 +77,7 @@ def bucket(): bucket.delete() -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def topic_id(): # Creates a pubsub topic, and tears it down. publisher = google.cloud.pubsub.PublisherClient() @@ -89,7 +92,7 @@ def topic_id(): publisher.delete_topic(topic_path) -@pytest.fixture(scope="module") +@pytest.fixture(scope="function") def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() @@ -159,6 +162,21 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) +def delay(err, *args): + # 20 mins of delay. This sounds like too long a delay, but we + # occasionally observe consequtive time block where operations are + # slow which leads to the test failures. These situations tend to + # get self healed in 20 minutes or so, so I'm trying this strategy. + # + # There are many tests, so we don't want the retry delay happening + # for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry + # the test immediately. + wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20) + pytest.MAX_FLAKY_WAIT -= wait_time + time.sleep(wait_time) + return True + + def test_inspect_string(capsys): test_string = "My name is Gary Smith and my email is gary@example.com" @@ -298,6 +316,7 @@ def cancel_operation(out): client.cancel_dlp_job(operation_id) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys): try: inspect_content.inspect_gcs_file( @@ -307,15 +326,16 @@ def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys): topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1 + timeout=120 ) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_inspect_gcs_file_with_custom_info_types( bucket, topic_id, subscription_id, capsys): try: @@ -331,15 +351,16 @@ def test_inspect_gcs_file_with_custom_info_types( [], custom_dictionaries=dictionaries, custom_regexes=regexes, - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_inspect_gcs_file_no_results( bucket, topic_id, subscription_id, capsys): try: @@ -350,15 +371,18 @@ def test_inspect_gcs_file_no_results( topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() assert "Inspection operation started" in out + assert "test_inspect_gcs_file_with_custom_info_types" not in out + assert "Info type: EMAIL_ADDRESS" not in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys): try: inspect_content.inspect_gcs_file( @@ -368,14 +392,15 @@ def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys): topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): try: inspect_content.inspect_gcs_file( @@ -385,15 +410,16 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_inspect_datastore( datastore_project, topic_id, subscription_id, capsys): try: @@ -404,14 +430,15 @@ def test_inspect_datastore( topic_id, subscription_id, ["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) def test_inspect_datastore_no_results( datastore_project, topic_id, subscription_id, capsys): try: @@ -422,15 +449,20 @@ def test_inspect_datastore_no_results( topic_id, subscription_id, ["PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() assert "Inspection operation started" in out + assert "test_inspect_gcs_file_with_custom_info_types" not in out + assert "Info type: EMAIL_ADDRESS" not in out finally: cancel_operation(out) def test_inspect_bigquery(bigquery_project, topic_id, subscription_id, capsys): + # Now this test doesn't check the operation output because the + # sample code doesn't detect anything against BigQuery table + # defined in the fixture above. try: inspect_content.inspect_bigquery( GCLOUD_PROJECT,