Skip to content

Commit

Permalink
[dlp] testing: re-enable some tests
Browse files Browse the repository at this point in the history
fixes #3422
  • Loading branch information
Takashi Matsuo committed Jun 1, 2020
1 parent 953f89c commit 173df2c
Showing 1 changed file with 46 additions and 14 deletions.
60 changes: 46 additions & 14 deletions dlp/inspect_content_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import os
import time
import uuid

import google.api_core.exceptions
Expand Down Expand Up @@ -40,6 +41,8 @@
BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING

TIMEOUT = 120


@pytest.fixture(scope="module")
def bucket():
Expand Down Expand Up @@ -74,7 +77,7 @@ def bucket():
bucket.delete()


@pytest.fixture(scope="module")
@pytest.fixture(scope="function")
def topic_id():
# Creates a pubsub topic, and tears it down.
publisher = google.cloud.pubsub.PublisherClient()
Expand All @@ -89,7 +92,7 @@ def topic_id():
publisher.delete_topic(topic_path)


@pytest.fixture(scope="module")
@pytest.fixture(scope="function")
def subscription_id(topic_id):
# Subscribes to a topic.
subscriber = google.cloud.pubsub.SubscriberClient()
Expand Down Expand Up @@ -159,6 +162,21 @@ def bigquery_project():
bigquery_client.delete_dataset(dataset_ref, delete_contents=True)


def delay(err, *args):
# 20 mins of delay. This sounds like too long a delay, but we
# occasionally observe consequtive time block where operations are
# slow which leads to the test failures. These situations tend to
# get self healed in 20 minutes or so, so I'm trying this strategy.
#
# There are many tests, so we don't want the retry delay happening
# for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry
# the test immediately.
wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20)
pytest.MAX_FLAKY_WAIT -= wait_time
time.sleep(wait_time)
return True


def test_inspect_string(capsys):
test_string = "My name is Gary Smith and my email is gary@example.com"

Expand Down Expand Up @@ -298,6 +316,7 @@ def cancel_operation(out):
client.cancel_dlp_job(operation_id)


@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
try:
inspect_content.inspect_gcs_file(
Expand All @@ -307,15 +326,16 @@ def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1
timeout=120
)

out, _ = capsys.readouterr()
assert "Inspection operation started" in out
assert "Info type: EMAIL_ADDRESS" in out
finally:
cancel_operation(out)


@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
def test_inspect_gcs_file_with_custom_info_types(
bucket, topic_id, subscription_id, capsys):
try:
Expand All @@ -331,15 +351,16 @@ def test_inspect_gcs_file_with_custom_info_types(
[],
custom_dictionaries=dictionaries,
custom_regexes=regexes,
timeout=1)
timeout=TIMEOUT)

out, _ = capsys.readouterr()

assert "Inspection operation started" in out
assert "Info type: EMAIL_ADDRESS" in out
finally:
cancel_operation(out)


@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
def test_inspect_gcs_file_no_results(
bucket, topic_id, subscription_id, capsys):
try:
Expand All @@ -350,15 +371,18 @@ def test_inspect_gcs_file_no_results(
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)
timeout=TIMEOUT)

out, _ = capsys.readouterr()

assert "Inspection operation started" in out
assert "test_inspect_gcs_file_with_custom_info_types" not in out
assert "Info type: EMAIL_ADDRESS" not in out
finally:
cancel_operation(out)


@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
try:
inspect_content.inspect_gcs_file(
Expand All @@ -368,14 +392,15 @@ def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)
timeout=TIMEOUT)

out, _ = capsys.readouterr()
assert "Inspection operation started" in out
assert "Info type: EMAIL_ADDRESS" in out
finally:
cancel_operation(out)


@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
try:
inspect_content.inspect_gcs_file(
Expand All @@ -385,15 +410,16 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)
timeout=TIMEOUT)

out, _ = capsys.readouterr()

assert "Inspection operation started" in out
assert "Info type: EMAIL_ADDRESS" in out
finally:
cancel_operation(out)


@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
def test_inspect_datastore(
datastore_project, topic_id, subscription_id, capsys):
try:
Expand All @@ -404,14 +430,15 @@ def test_inspect_datastore(
topic_id,
subscription_id,
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)
timeout=TIMEOUT)

out, _ = capsys.readouterr()
assert "Inspection operation started" in out
assert "Info type: EMAIL_ADDRESS" in out
finally:
cancel_operation(out)


@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
def test_inspect_datastore_no_results(
datastore_project, topic_id, subscription_id, capsys):
try:
Expand All @@ -422,15 +449,20 @@ def test_inspect_datastore_no_results(
topic_id,
subscription_id,
["PHONE_NUMBER"],
timeout=1)
timeout=TIMEOUT)

out, _ = capsys.readouterr()
assert "Inspection operation started" in out
assert "test_inspect_gcs_file_with_custom_info_types" not in out
assert "Info type: EMAIL_ADDRESS" not in out
finally:
cancel_operation(out)


def test_inspect_bigquery(bigquery_project, topic_id, subscription_id, capsys):
# Now this test doesn't check the operation output because the
# sample code doesn't detect anything against BigQuery table
# defined in the fixture above.
try:
inspect_content.inspect_bigquery(
GCLOUD_PROJECT,
Expand Down

0 comments on commit 173df2c

Please sign in to comment.