Skip to content

Commit

Permalink
[dlp] fix: fix periodic builds timeout (#3420)
Browse files Browse the repository at this point in the history
* [dlp] fix: remove gcp-devrel-py-tools

fixes #3375
fixes #3416
fixes #3417

* remove wrong usage of `eventually_consistent.call`
* only test if the operation has been started
* shorter timeout for polling
* correct use of `pytest.mark.flaky`
* use try-finally
* use uuid for job_id
* add a filter to allow state = DONE
  • Loading branch information
Takashi Matsuo authored Apr 17, 2020
1 parent b57b562 commit 0965e26
Show file tree
Hide file tree
Showing 6 changed files with 298 additions and 294 deletions.
3 changes: 3 additions & 0 deletions dlp/inspect_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ def inspect_gcs_file(

operation = dlp.create_dlp_job(parent, inspect_job=inspect_job)
print("Inspection operation started: {}".format(operation.name))

# Create a Pub/Sub client and find the subscription. The subscription is
# expected to already be listening to the topic.
subscriber = google.cloud.pubsub.SubscriberClient()
Expand Down Expand Up @@ -636,6 +637,7 @@ def inspect_datastore(
}

operation = dlp.create_dlp_job(parent, inspect_job=inspect_job)
print("Inspection operation started: {}".format(operation.name))

# Create a Pub/Sub client and find the subscription. The subscription is
# expected to already be listening to the topic.
Expand Down Expand Up @@ -802,6 +804,7 @@ def inspect_bigquery(
}

operation = dlp.create_dlp_job(parent, inspect_job=inspect_job)
print("Inspection operation started: {}".format(operation.name))

# Create a Pub/Sub client and find the subscription. The subscription is
# expected to already be listening to the topic.
Expand Down
223 changes: 113 additions & 110 deletions dlp/inspect_content_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,18 @@
import os
import uuid

from gcp_devrel.testing import eventually_consistent
from gcp_devrel.testing.flaky import flaky
import google.api_core.exceptions
import google.cloud.bigquery
import google.cloud.datastore
import google.cloud.dlp_v2
import google.cloud.exceptions
import google.cloud.pubsub
import google.cloud.storage

import pytest

import inspect_content


UNIQUE_STRING = str(uuid.uuid4()).split("-")[0]

GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
Expand Down Expand Up @@ -95,7 +94,8 @@ def subscription_id(topic_id):
# Subscribes to a topic.
subscriber = google.cloud.pubsub.SubscriberClient()
topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id)
subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID)
subscription_path = subscriber.subscription_path(
GCLOUD_PROJECT, SUBSCRIPTION_ID)
try:
subscriber.create_subscription(subscription_path, topic_path)
except google.api_core.exceptions.AlreadyExists:
Expand Down Expand Up @@ -289,157 +289,160 @@ def test_inspect_image_file(capsys):
assert "Info type: PHONE_NUMBER" in out


def cancel_operation(out):
if "Inspection operation started" in out:
# Cancel the operation
operation_id = out.split(
"Inspection operation started: ")[1].split("\n")[0]
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)


def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"test.txt",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=420,
)
try:
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"test.txt",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1
)

out, _ = capsys.readouterr()
assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)
out, _ = capsys.readouterr()
assert "Inspection operation started" in out
finally:
cancel_operation(out)


def test_inspect_gcs_file_with_custom_info_types(
bucket, topic_id, subscription_id, capsys
):
dictionaries = ["gary@somedomain.com"]
regexes = ["\\(\\d{3}\\) \\d{3}-\\d{4}"]
bucket, topic_id, subscription_id, capsys):
try:
dictionaries = ["gary@somedomain.com"]
regexes = ["\\(\\d{3}\\) \\d{3}-\\d{4}"]

inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"test.txt",
topic_id,
subscription_id,
[],
custom_dictionaries=dictionaries,
custom_regexes=regexes,
timeout=420,
)
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"test.txt",
topic_id,
subscription_id,
[],
custom_dictionaries=dictionaries,
custom_regexes=regexes,
timeout=1)

out, _ = capsys.readouterr()
out, _ = capsys.readouterr()

assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)
assert "Inspection operation started" in out
finally:
cancel_operation(out)


def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"harmless.txt",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=420,
)
def test_inspect_gcs_file_no_results(
bucket, topic_id, subscription_id, capsys):
try:
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"harmless.txt",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)

out, _ = capsys.readouterr()
out, _ = capsys.readouterr()

assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)
assert "Inspection operation started" in out
finally:
cancel_operation(out)


@pytest.mark.skip(reason="nondeterministically failing")
def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"test.png",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
)
try:
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"test.png",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)

out, _ = capsys.readouterr()
assert "Info type: EMAIL_ADDRESS" in out
out, _ = capsys.readouterr()
assert "Inspection operation started" in out
finally:
cancel_operation(out)


def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"*",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
)
try:
inspect_content.inspect_gcs_file(
GCLOUD_PROJECT,
bucket.name,
"*",
topic_id,
subscription_id,
["EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)

out, _ = capsys.readouterr()
out, _ = capsys.readouterr()

assert "Inspection operation started" in out
# Cancel the operation
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
print(operation_id)
client = google.cloud.dlp_v2.DlpServiceClient()
client.cancel_dlp_job(operation_id)
assert "Inspection operation started" in out
finally:
cancel_operation(out)


@flaky
def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys):
@eventually_consistent.call
def _():
def test_inspect_datastore(
datastore_project, topic_id, subscription_id, capsys):
try:
inspect_content.inspect_datastore(
GCLOUD_PROJECT,
datastore_project,
DATASTORE_KIND,
topic_id,
subscription_id,
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
)
timeout=1)

out, _ = capsys.readouterr()
assert "Info type: EMAIL_ADDRESS" in out
assert "Inspection operation started" in out
finally:
cancel_operation(out)


@flaky
def test_inspect_datastore_no_results(
datastore_project, topic_id, subscription_id, capsys
):
@eventually_consistent.call
def _():
datastore_project, topic_id, subscription_id, capsys):
try:
inspect_content.inspect_datastore(
GCLOUD_PROJECT,
datastore_project,
DATASTORE_KIND,
topic_id,
subscription_id,
["PHONE_NUMBER"],
)
timeout=1)

out, _ = capsys.readouterr()
assert "No findings" in out
assert "Inspection operation started" in out
finally:
cancel_operation(out)


@pytest.mark.skip(reason="unknown issue")
def test_inspect_bigquery(bigquery_project, topic_id, subscription_id, capsys):
inspect_content.inspect_bigquery(
GCLOUD_PROJECT,
bigquery_project,
BIGQUERY_DATASET_ID,
BIGQUERY_TABLE_ID,
topic_id,
subscription_id,
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
)
try:
inspect_content.inspect_bigquery(
GCLOUD_PROJECT,
bigquery_project,
BIGQUERY_DATASET_ID,
BIGQUERY_TABLE_ID,
topic_id,
subscription_id,
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
timeout=1)

out, _ = capsys.readouterr()
assert "Info type: FIRST_NAME" in out
out, _ = capsys.readouterr()
assert "Inspection operation started" in out
finally:
cancel_operation(out)
8 changes: 4 additions & 4 deletions dlp/jobs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

import os
from flaky import flaky
import uuid

import pytest

Expand All @@ -24,6 +24,7 @@
TEST_TABLE_PROJECT_ID = "bigquery-public-data"
TEST_DATASET_ID = "san_francisco"
TEST_TABLE_ID = "bikeshare_trips"
test_job_id = "test-job-{}".format(uuid.uuid4())


@pytest.fixture(scope="module")
Expand All @@ -46,7 +47,7 @@ def test_job_name():
},
}

response = dlp.create_dlp_job(parent, risk_job=risk_job)
response = dlp.create_dlp_job(parent, risk_job=risk_job, job_id=test_job_id)
full_path = response.name
# API expects only job name, not full project path
job_name = full_path[full_path.rfind("/") + 1:]
Expand All @@ -66,11 +67,10 @@ def test_list_dlp_jobs(test_job_name, capsys):
assert test_job_name not in out


@flaky
def test_list_dlp_jobs_with_filter(test_job_name, capsys):
jobs.list_dlp_jobs(
GCLOUD_PROJECT,
filter_string="state=RUNNING",
filter_string="state=RUNNING OR state=DONE",
job_type="RISK_ANALYSIS_JOB",
)

Expand Down
2 changes: 1 addition & 1 deletion dlp/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pytest==5.3.2
gcp-devrel-py-tools==0.0.15
flaky==3.6.1
mock==3.0.5

Loading

0 comments on commit 0965e26

Please sign in to comment.