diff --git a/client/verta/tests/unit_tests/deployment/test_deployed_model.py b/client/verta/tests/unit_tests/deployment/test_deployed_model.py index 2b1191bd2b..653ab9d096 100644 --- a/client/verta/tests/unit_tests/deployment/test_deployed_model.py +++ b/client/verta/tests/unit_tests/deployment/test_deployed_model.py @@ -10,8 +10,6 @@ from tests import utils -np = pytest.importorskip("numpy") -pd = pytest.importorskip("pandas") from requests import Session, HTTPError from requests.exceptions import RetryError import responses @@ -24,33 +22,32 @@ from verta.deployment import DeployedModel from verta._internal_utils import http_session -PREDICTION_URL: str = 'https://test.dev.verta.ai/api/v1/predict/test_path' -BATCH_PREDICTION_URL: str = 'https://test.dev.verta.ai/api/v1/batch-predict/test_path' -TOKEN: str = '12345678-xxxx-1a2b-3c4d-e5f6g7h8' +PREDICTION_URL: str = "https://test.dev.verta.ai/api/v1/predict/test_path" +BATCH_PREDICTION_URL: str = "https://test.dev.verta.ai/api/v1/batch-predict/test_path" +TOKEN: str = "12345678-xxxx-1a2b-3c4d-e5f6g7h8" MOCK_RETRY: Retry = http_session.retry_config( max_retries=http_session.DEFAULT_MAX_RETRIES, status_forcelist=http_session.DEFAULT_STATUS_FORCELIST, - backoff_factor=http_session.DEFAULT_BACKOFF_FACTOR + backoff_factor=http_session.DEFAULT_BACKOFF_FACTOR, ) MOCK_SESSION: Session = http_session.init_session(retry=MOCK_RETRY) -VERTA_CLASS = 'verta.deployment._deployedmodel' +VERTA_CLASS = "verta.deployment._deployedmodel" @patch.dict( os.environ, - {'VERTA_EMAIL': 'test_email@verta.ai', - 'VERTA_DEV_KEY': '123test1232dev1232key123'}, + {"VERTA_EMAIL": "test_email@verta.ai", "VERTA_DEV_KEY": "123test1232dev1232key123"}, ) @patch( - f'{VERTA_CLASS}.http_session.retry_config', + f"{VERTA_CLASS}.http_session.retry_config", return_value=MOCK_RETRY, ) @patch( - f'{VERTA_CLASS}.http_session.init_session', + f"{VERTA_CLASS}.http_session.init_session", return_value=MOCK_SESSION, ) def test_deployed_model_init(mock_session, mock_retry) -> None: - """ Validate the creation of an object of deployment.DeployedModel class with desired Session. """ + """Validate the creation of an object of deployment.DeployedModel class with desired Session.""" creds = EmailCredentials.load_from_os_env() created_dm_details = DeployedModel( prediction_url=PREDICTION_URL, @@ -58,25 +55,27 @@ def test_deployed_model_init(mock_session, mock_retry) -> None: token=TOKEN, ).__dict__ expected_dm_details: Dict[str, Any] = { - '_prediction_url': PREDICTION_URL, - '_credentials': creds, - '_access_token': '12345678-xxxx-1a2b-3c4d-e5f6g7h8', - '_retry_config': mock_retry.return_value, - '_session': mock_session.return_value + "_prediction_url": PREDICTION_URL, + "_credentials": creds, + "_access_token": "12345678-xxxx-1a2b-3c4d-e5f6g7h8", + "_retry_config": mock_retry.return_value, + "_session": mock_session.return_value, } - assert created_dm_details['_prediction_url'] == expected_dm_details['_prediction_url'] - assert created_dm_details['_access_token'] == expected_dm_details['_access_token'] - assert created_dm_details['_credentials'] == expected_dm_details['_credentials'] - assert created_dm_details['_session'] == expected_dm_details['_session'] + assert ( + created_dm_details["_prediction_url"] == expected_dm_details["_prediction_url"] + ) + assert created_dm_details["_access_token"] == expected_dm_details["_access_token"] + assert created_dm_details["_credentials"] == expected_dm_details["_credentials"] + assert created_dm_details["_session"] == expected_dm_details["_session"] def test_predict_http_defaults_200(mocked_responses) -> None: - """ Calling predict with the default settings and getting a 200 response returns the response as expected. """ + """Calling predict with the default settings and getting a 200 response returns the response as expected.""" mocked_responses.post( PREDICTION_URL, json={"test_key": "test_val"}, status=200, - headers={'verta-request-id': 'hereISaTESTidFROMtheUSER'}, + headers={"verta-request-id": "hereISaTESTidFROMtheUSER"}, ) creds = EmailCredentials.load_from_os_env() dm = DeployedModel( @@ -84,12 +83,12 @@ def test_predict_http_defaults_200(mocked_responses) -> None: creds=creds, token=TOKEN, ) - prediction_response = dm.predict(x=['test_prediction']) + prediction_response = dm.predict(x=["test_prediction"]) assert prediction_response == {"test_key": "test_val"} def test_predict_http_defaults_404_retry_error(mocked_responses) -> None: - """ Calling predict with the default settings and getting a 404 results in retries being exhausted. """ + """Calling predict with the default settings and getting a 404 results in retries being exhausted.""" mocked_responses.post( PREDICTION_URL, json={}, @@ -102,11 +101,11 @@ def test_predict_http_defaults_404_retry_error(mocked_responses) -> None: token=TOKEN, ) with pytest.raises(RetryError): - dm.predict(x=['test_prediction']) + dm.predict(x=["test_prediction"]) def test_predict_http_defaults_429_retry_error(mocked_responses) -> None: - """ Calling predict with the default settings and getting a 429 results in retries being exhausted. """ + """Calling predict with the default settings and getting a 429 results in retries being exhausted.""" mocked_responses.post( PREDICTION_URL, json={}, @@ -119,15 +118,15 @@ def test_predict_http_defaults_429_retry_error(mocked_responses) -> None: token=TOKEN, ) with pytest.raises(RetryError): - dm.predict(x=['test_prediction']) + dm.predict(x=["test_prediction"]) def test_predict_http_defaults_status_not_in_retry(mocked_responses) -> None: - """ Verify that calling predict with the default settings and getting a response not in `status_forcelist` - does not result in retries. """ + """Verify that calling predict with the default settings and getting a response not in `status_forcelist` + does not result in retries.""" mocked_responses.post( PREDICTION_URL, - headers={'verta-request-id': 'hereISaTESTidFROMtheUSER'}, + headers={"verta-request-id": "hereISaTESTidFROMtheUSER"}, json={}, status=999, ) @@ -137,12 +136,12 @@ def test_predict_http_defaults_status_not_in_retry(mocked_responses) -> None: creds=creds, token=TOKEN, ) - dm.predict(x=['test_prediction']) + dm.predict(x=["test_prediction"]) mocked_responses.assert_call_count(PREDICTION_URL, 1) def test_predict_http_default_max_retry_observed(mocked_responses) -> None: - """ Calling predict with the default settings and getting a 429 results in retries being exhausted. """ + """Calling predict with the default settings and getting a 429 results in retries being exhausted.""" mocked_responses.post( PREDICTION_URL, json={}, @@ -155,13 +154,15 @@ def test_predict_http_default_max_retry_observed(mocked_responses) -> None: token=TOKEN, ) with pytest.raises(RetryError): - dm.predict(x=['test_prediction']) - mocked_responses.assert_call_count(PREDICTION_URL, http_session.DEFAULT_MAX_RETRIES + 1) + dm.predict(x=["test_prediction"]) + mocked_responses.assert_call_count( + PREDICTION_URL, http_session.DEFAULT_MAX_RETRIES + 1 + ) # max_retries + 1 original attempt = total call count def test_predict_with_altered_retry_config(mocked_responses) -> None: - """ Calling predict with custom retry parameters changes the retry config and makes the correct requests. """ + """Calling predict with custom retry parameters changes the retry config and makes the correct requests.""" mocked_responses.post( PREDICTION_URL, json={}, @@ -175,22 +176,23 @@ def test_predict_with_altered_retry_config(mocked_responses) -> None: ) with pytest.raises(RetryError): dm.predict( - x=['test_prediction'], - max_retries=9, - retry_status={888}, - backoff_factor=0.1 + x=["test_prediction"], max_retries=9, retry_status={888}, backoff_factor=0.1 ) mocked_responses.assert_call_count(PREDICTION_URL, 10) def test_predict_with_prediction_id_provided(mocked_responses) -> None: - """ Calling predict while providing a value for `prediction_id` updates and includes the headers in the request. """ + """Calling predict while providing a value for `prediction_id` updates and includes the headers in the request.""" mocked_responses.post( PREDICTION_URL, - json={'test1': 'test1'}, + json={"test1": "test1"}, status=200, - match=[responses.matchers.header_matcher({'verta-request-id': 'hereISaTESTidFROMtheUSER'})], - headers={'verta-request-id': 'hereISaTESTidFROMtheUSER'}, + match=[ + responses.matchers.header_matcher( + {"verta-request-id": "hereISaTESTidFROMtheUSER"} + ) + ], + headers={"verta-request-id": "hereISaTESTidFROMtheUSER"}, ) creds = EmailCredentials.load_from_os_env() dm = DeployedModel( @@ -199,19 +201,19 @@ def test_predict_with_prediction_id_provided(mocked_responses) -> None: token=TOKEN, ) dm.predict( - x=['test_prediction'], - prediction_id='hereISaTESTidFROMtheUSER', + x=["test_prediction"], + prediction_id="hereISaTESTidFROMtheUSER", ) mocked_responses.assert_call_count(PREDICTION_URL, 1) def test_predict_with_id_response_includes_id(mocked_responses) -> None: - """ Calling predict_with_id returns both the ID from teh request response, and the prediction results """ + """Calling predict_with_id returns both the ID from teh request response, and the prediction results""" mocked_responses.post( PREDICTION_URL, - headers={'verta-request-id': 'AutoGeneratedTestId'}, + headers={"verta-request-id": "AutoGeneratedTestId"}, # Adds this header to the mocked http response. - json={'test2': 'test2'}, + json={"test2": "test2"}, status=200, ) creds = EmailCredentials.load_from_os_env() @@ -220,20 +222,24 @@ def test_predict_with_id_response_includes_id(mocked_responses) -> None: creds=creds, token=TOKEN, ) - prediction = dm.predict_with_id(x=['test_prediction']) - assert prediction == ('AutoGeneratedTestId', {'test2': 'test2'}) + prediction = dm.predict_with_id(x=["test_prediction"]) + assert prediction == ("AutoGeneratedTestId", {"test2": "test2"}) def test_predict_with_id_prediction_id_provided(mocked_responses) -> None: - """ Calling predict_with_id while including the `prediction_id` adds the id to the header of the request and - includes the id provided in the response with the prediction results """ + """Calling predict_with_id while including the `prediction_id` adds the id to the header of the request and + includes the id provided in the response with the prediction results""" mocked_responses.post( PREDICTION_URL, - match=[responses.matchers.header_matcher({'verta-request-id': 'hereISomeTESTidFROMtheUSER'})], + match=[ + responses.matchers.header_matcher( + {"verta-request-id": "hereISomeTESTidFROMtheUSER"} + ) + ], # Makes sure the prediction id was included as a header in the request - headers={'verta-request-id': 'hereISomeTESTidFROMtheUSER'}, + headers={"verta-request-id": "hereISomeTESTidFROMtheUSER"}, # Adds this header to the mocked http response. - json={'test2': 'test2'}, + json={"test2": "test2"}, status=200, ) creds = EmailCredentials.load_from_os_env() @@ -243,19 +249,18 @@ def test_predict_with_id_prediction_id_provided(mocked_responses) -> None: token=TOKEN, ) prediction = dm.predict_with_id( - x=['test_prediction'], - prediction_id='hereISomeTESTidFROMtheUSER' + x=["test_prediction"], prediction_id="hereISomeTESTidFROMtheUSER" ) - assert prediction == ('hereISomeTESTidFROMtheUSER', {'test2': 'test2'}) + assert prediction == ("hereISomeTESTidFROMtheUSER", {"test2": "test2"}) def test_predict_with_id_http_defaults_200(mocked_responses) -> None: - """ Calling predict with the default settings and getting a 200 response returns the response as expected. """ + """Calling predict with the default settings and getting a 200 response returns the response as expected.""" mocked_responses.post( PREDICTION_URL, json={"test_key": "test_val"}, status=200, - headers={'verta-request-id': 'hereISthisTESTidFROMtheUSER'}, + headers={"verta-request-id": "hereISthisTESTidFROMtheUSER"}, ) creds = EmailCredentials.load_from_os_env() dm = DeployedModel( @@ -263,12 +268,15 @@ def test_predict_with_id_http_defaults_200(mocked_responses) -> None: creds=creds, token=TOKEN, ) - prediction_response = dm.predict_with_id(x=['test_prediction']) - assert prediction_response == ('hereISthisTESTidFROMtheUSER', {"test_key": "test_val"}) + prediction_response = dm.predict_with_id(x=["test_prediction"]) + assert prediction_response == ( + "hereISthisTESTidFROMtheUSER", + {"test_key": "test_val"}, + ) def test_predict_with_id_http_defaults_404_retry_error(mocked_responses) -> None: - """ Calling predict with the default settings and getting a 404 results in retries being exhausted. """ + """Calling predict with the default settings and getting a 404 results in retries being exhausted.""" mocked_responses.post( PREDICTION_URL, json={}, @@ -281,11 +289,11 @@ def test_predict_with_id_http_defaults_404_retry_error(mocked_responses) -> None token=TOKEN, ) with pytest.raises(RetryError): - dm.predict_with_id(x=['test_prediction']) + dm.predict_with_id(x=["test_prediction"]) def test_predict_with_id_altered_retry_config(mocked_responses) -> None: - """ Calling predict with custom retry parameters changes the retry config and makes the correct requests. """ + """Calling predict with custom retry parameters changes the retry config and makes the correct requests.""" mocked_responses.post( PREDICTION_URL, json={}, @@ -299,18 +307,15 @@ def test_predict_with_id_altered_retry_config(mocked_responses) -> None: ) with pytest.raises(RetryError): dm.predict_with_id( - x=['test_prediction'], - max_retries=9, - retry_status={888}, - backoff_factor=0.1 + x=["test_prediction"], max_retries=9, retry_status={888}, backoff_factor=0.1 ) mocked_responses.assert_call_count(PREDICTION_URL, 10) def test_default_retry_after_custom_retry(mocked_responses) -> None: - """ Calling predict with default params after calling predict with custom - params uses default retry settings and not the custom settings from - the previous call. """ + """Calling predict with default params after calling predict with custom + params uses default retry settings and not the custom settings from + the previous call.""" mocked_responses.post( PREDICTION_URL, json={}, @@ -324,7 +329,7 @@ def test_default_retry_after_custom_retry(mocked_responses) -> None: ) with pytest.raises(RetryError): dm.predict( - x=['test_prediction'], + x=["test_prediction"], max_retries=1, retry_status={777}, backoff_factor=0.1, @@ -338,18 +343,18 @@ def test_default_retry_after_custom_retry(mocked_responses) -> None: status=429, ) with pytest.raises(RetryError): - dm.predict(x=['test_prediction']) # use defaults + dm.predict(x=["test_prediction"]) # use defaults mocked_responses.assert_call_count(PREDICTION_URL, 16) # previous 2 + 1 attempt + default 13 retries = 16 def test_predict_400_error_message_extraction(mocked_responses) -> None: - """ Getting a 400 will render the attached message form the backend if present """ + """Getting a 400 will render the attached message form the backend if present""" mocked_responses.post( PREDICTION_URL, json={"message": "Here be a message in the response"}, status=400, - headers={'verta-request-id': 'AutoGeneratedTestId'}, + headers={"verta-request-id": "AutoGeneratedTestId"}, ) creds = EmailCredentials.load_from_os_env() dm = DeployedModel( @@ -358,20 +363,20 @@ def test_predict_400_error_message_extraction(mocked_responses) -> None: token=TOKEN, ) with pytest.raises(RuntimeError) as err: - dm.predict(x=['test_prediction']) + dm.predict(x=["test_prediction"]) assert str(err.value) == ( - 'deployed model encountered an error: Here be a message in the response' + "deployed model encountered an error: Here be a message in the response" ) def test_predict_400_error_message_missing(mocked_responses) -> None: - """ Getting a 401 error, with no message provided by the back-end will fall back - to raise_for_http_error style error formatting. - """ + """Getting a 401 error, with no message provided by the back-end will fall back + to raise_for_http_error style error formatting. + """ mocked_responses.post( PREDICTION_URL, status=400, - headers={'verta-request-id': 'AutoGeneratedTestId'}, + headers={"verta-request-id": "AutoGeneratedTestId"}, ) creds = EmailCredentials.load_from_os_env() dm = DeployedModel( @@ -380,16 +385,22 @@ def test_predict_400_error_message_missing(mocked_responses) -> None: token=TOKEN, ) with pytest.raises(HTTPError) as err: - dm.predict(x=['test_prediction']) + dm.predict(x=["test_prediction"]) assert str(err.value)[:-30] == ( - '400 Client Error: Bad Request for url: ' - 'https://test.dev.verta.ai/api/v1/predict/test_path at ' + "400 Client Error: Bad Request for url: " + "https://test.dev.verta.ai/api/v1/predict/test_path at " ) def test_batch_predict_with_one_batch_with_no_index(mocked_responses) -> None: - """ Call batch_predict with a single batch. """ - expected_df = pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}) + """Call batch_predict with a single batch.""" + pd = pytest.importorskip("pandas") + expected_df = pd.DataFrame( + { + "A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20], + } + ) expected_df_body = json.dumps(expected_df.to_dict(orient="split")) mocked_responses.post( BATCH_PREDICTION_URL, @@ -408,9 +419,15 @@ def test_batch_predict_with_one_batch_with_no_index(mocked_responses) -> None: def test_batch_predict_with_one_batch_with_index(mocked_responses) -> None: - """ Call batch_predict with a single batch, where the output has an index. """ - expected_df = pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}, - index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]) + """Call batch_predict with a single batch, where the output has an index.""" + pd = pytest.importorskip("pandas") + expected_df = pd.DataFrame( + { + "A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20], + }, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ) expected_df_body = json.dumps(expected_df.to_dict(orient="split")) mocked_responses.post( BATCH_PREDICTION_URL, @@ -429,13 +446,15 @@ def test_batch_predict_with_one_batch_with_index(mocked_responses) -> None: def test_batch_predict_with_five_batches_with_no_indexes(mocked_responses) -> None: - """ Since the input has 5 rows and we're providing a batch_size of 1, we expect 5 batches.""" - expected_df_list = [pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}), - pd.DataFrame({"B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}), - pd.DataFrame({"C": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]}), - pd.DataFrame({"D": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40]}), - pd.DataFrame({"E": [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}), - ] + """Since the input has 5 rows and we're providing a batch_size of 1, we expect 5 batches.""" + pd = pytest.importorskip("pandas") + expected_df_list = [ + pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}), + pd.DataFrame({"B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}), + pd.DataFrame({"C": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]}), + pd.DataFrame({"D": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40]}), + pd.DataFrame({"E": [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}), + ] for expected_df in expected_df_list: mocked_responses.add( responses.POST, @@ -456,20 +475,32 @@ def test_batch_predict_with_five_batches_with_no_indexes(mocked_responses) -> No def test_batch_predict_with_batches_and_indexes(mocked_responses) -> None: - """ Since the input has 5 rows and we're providing a batch_size of 1, we expect 5 batches. + """Since the input has 5 rows and we're providing a batch_size of 1, we expect 5 batches. Include an example of an index. """ + pd = pytest.importorskip("pandas") expected_df_list = [ - pd.DataFrame({"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), - pd.DataFrame({"B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}, - index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), - pd.DataFrame({"C": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]}, - index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), - pd.DataFrame({"D": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40]}, - index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), - pd.DataFrame({"E": [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}, - index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), - ] + pd.DataFrame( + {"A": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ), + pd.DataFrame( + {"B": [11, 12, 13, 14, 15, 16, 17, 18, 19, 20]}, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ), + pd.DataFrame( + {"C": [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]}, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ), + pd.DataFrame( + {"D": [31, 32, 33, 34, 35, 36, 37, 38, 39, 40]}, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ), + pd.DataFrame( + {"E": [41, 42, 43, 44, 45, 46, 47, 48, 49, 50]}, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ), + ] for expected_df in expected_df_list: mocked_responses.add( responses.POST, @@ -483,7 +514,10 @@ def test_batch_predict_with_batches_and_indexes(mocked_responses) -> None: creds=creds, token=TOKEN, ) - input_df = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [11, 12, 13, 14, 15]}, index=["A", "B", "C", "D", "E"]) + input_df = pd.DataFrame( + {"a": [1, 2, 3, 4, 5], "b": [11, 12, 13, 14, 15]}, + index=["A", "B", "C", "D", "E"], + ) prediction_df = dm.batch_predict(input_df, 1) expected_final_df = pd.concat(expected_df_list) pd.testing.assert_frame_equal(expected_final_df, prediction_df) @@ -491,13 +525,14 @@ def test_batch_predict_with_batches_and_indexes(mocked_responses) -> None: @st.composite def generate_data(draw, max_rows=50, max_cols=6): - """ Return a dict that represents a dataframe. Generates ints, floats, and strings.""" + """Return a dict that represents a dataframe. Generates ints, floats, and strings.""" num_rows = draw(st.integers(min_value=1, max_value=max_rows)) num_cols = draw(st.integers(min_value=1, max_value=max_cols)) - col_names = draw(st.lists(st.text(), max_size=num_cols, min_size=num_cols, unique=True)) + col_names = draw( + st.lists(st.text(), max_size=num_cols, min_size=num_cols, unique=True) + ) data = {} for name in col_names: - type_probability = utils.gen_probability() if type_probability <= 0.3: col_values = st.integers() @@ -516,17 +551,20 @@ def generate_data(draw, max_rows=50, max_cols=6): return out_dict -@hypothesis.settings(deadline=None) # client utils make DataFrame handling slow at first +@hypothesis.settings( + deadline=None +) # client utils make DataFrame handling slow at first @given(json_df=generate_data(), batch_size=st.integers(min_value=1, max_value=10)) def test_batch(json_df, batch_size) -> None: - """ Test that the batch_predict method works with a variety of inputs. """ + """Test that the batch_predict method works with a variety of inputs.""" + pd = pytest.importorskip("pandas") with responses.RequestsMock() as rsps: if "index" in json_df: input_df = pd.DataFrame(json_df["data"], index=json_df["index"]) else: input_df = pd.DataFrame(json_df["data"]) for i in range(0, len(input_df), batch_size): - batch = input_df.iloc[i:i + batch_size] + batch = input_df.iloc[i : i + batch_size] serialized_batch = batch.to_dict(orient="split") rsps.add( responses.POST, @@ -542,4 +580,3 @@ def test_batch(json_df, batch_size) -> None: ) prediction_df = dm.batch_predict(input_df, batch_size=batch_size) pd.testing.assert_frame_equal(input_df, prediction_df) - diff --git a/client/verta/tests/unit_tests/registry/conftest.py b/client/verta/tests/unit_tests/registry/conftest.py index 0826862cef..d77cc05c11 100644 --- a/client/verta/tests/unit_tests/registry/conftest.py +++ b/client/verta/tests/unit_tests/registry/conftest.py @@ -8,7 +8,9 @@ import click import cloudpickle as cp +import jsonschema import pytest +import pytimeparse import requests from requests import post import urllib3 @@ -22,17 +24,13 @@ @pytest.fixture(scope="session") def dependency_testing_model() -> Type[VertaModelBase]: """Returns a model class that imports and calls external dependencies.""" - numpy = pytest.importorskip("numpy") - pd =pytest.importorskip("pandas") - sklearn = pytest.importorskip("sklearn") - torch = pytest.importorskip("torch") - PIL = pytest.importorskip("PIL") class DependencyTestingModel(VertaModelBase): """ Model class that imports and calls external dependencies in a variety of ways for the purpose of testing our model environment validation logic. """ + def __init__(self, artifacts): pass @@ -40,53 +38,56 @@ def __init__(self, artifacts): # extracted, thus we explicitly test the same scenarios with and without. @verify_io def predict( - self, - w: calendar.Calendar, # standard library in function arg - x: dt.datetime, # standard library in function arg via alias - y: numpy.ndarray, # 3rd-party module in function arg - z: Message, # 3rd-party module in function arg via class - ) -> pd.DataFrame: # 3rd-party module in return type hint - hour = x.hour # standard library usage in function body - runtime.log('error', 'Error') # 3rd-party module in function body (VERTA) - yaml_con = yaml.constructor # 3rd party module in function body - z = self.make_dataframe(y) # 3rd party module called indirectly + self, + w: calendar.Calendar, # standard library in function arg + x: dt.datetime, # standard library in function arg via alias + y: click.File, # 3rd-party module in function arg + z: Message, # 3rd-party module in function arg via class + ) -> cp.CloudPickler: # 3rd-party module in return type hint + hour = x.hour # standard library usage in function body + runtime.log("error", "Error") # 3rd-party module in function body (VERTA) + yaml_con = yaml.constructor # 3rd party module in function body + z = self.do_something(y) # 3rd party module called indirectly return z - def unwrapped_predict( - self, - a: json.JSONEncoder, # standard library in function arg - b: collecs.OrderedDict, # standard library in function arg via alias - c: sklearn.base.BaseEstimator, # 3rd-party module in function arg - d: cp.CloudPickler, # 3rd-party module in function arg via alias - ) -> requests.Timeout: # 3rd-party module in return type hint - _json = a.encode({'x':'y'}) # standard library usage in function body - with runtime.context(): # 3rd-party module in function body (VERTA) - runtime.log('error', 'Error') # 3rd-party module in function body (VERTA) - click_exc = click.ClickException # 3rd party module in function body - z = self.make_timeout() # 3rd party module called indirectly + self, + a: json.JSONEncoder, # standard library in function arg + b: collecs.OrderedDict, # standard library in function arg via alias + c: yaml.SafeLoader, # 3rd-party module in function arg + d: cp.CloudPickler, # 3rd-party module in function arg via alias + ) -> requests.Timeout: # 3rd-party module in return type hint + _json = a.encode({"x": "y"}) # standard library usage in function body + with runtime.context(): # 3rd-party module in function body (VERTA) + runtime.log( + "error", "Error" + ) # 3rd-party module in function body (VERTA) + click_exc = click.ClickException # 3rd party module in function body + z = self.make_timeout() # 3rd party module called indirectly return z - def make_dataframe(self, input): # No modules in function signature - return pd.DataFrame(input) # 3rd party module in function body + def make_dataframe(self, input): # No modules in function signature + return pytimeparse.parse(input) # 3rd party module in function body def make_message(self, input: str): msg = Message(input) return msg @staticmethod - def make_timeout(): # No modules in function signature - return requests.Timeout() # 3rd party module in function body + def make_timeout(): # No modules in function signature + return requests.Timeout() # 3rd party module in function body def post_request(self) -> None: post("https://www.verta.ai") # 3rd-party modules nested inside type constructs should still be extracted - def nested_multiple_returns_hint(self) -> Union[urllib3.Retry, PIL.UnidentifiedImageError]: - return urllib3.Retry or PIL.UnidentifiedImageError + def nested_multiple_returns_hint( + self, + ) -> Union[urllib3.Retry, jsonschema.exceptions.ValidationError]: + return urllib3.Retry or jsonschema.exceptions.ValidationError # 3rd-party modules nested inside type constructs should still be extracted - def nested_type_hint(self) -> Type[torch.NoneType]: - return torch.NoneType + def nested_type_hint(self) -> Type[jsonschema.TypeChecker]: + return jsonschema.TypeChecker return DependencyTestingModel diff --git a/client/verta/tests/unit_tests/registry/test_check_model_dependencies.py b/client/verta/tests/unit_tests/registry/test_check_model_dependencies.py index 7ba082647e..7e670e7c9d 100644 --- a/client/verta/tests/unit_tests/registry/test_check_model_dependencies.py +++ b/client/verta/tests/unit_tests/registry/test_check_model_dependencies.py @@ -9,27 +9,28 @@ from verta.registry import check_model_dependencies -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def complete_env() -> Python: - """ Environment with all 3rd-party packages expected to be extracted + """Environment with all 3rd-party packages expected to be extracted from the dependency_testing_model fixture. """ - return Python([ - 'click==0.0.1', - 'googleapis-common-protos==0.0.1', - 'numpy==0.0.1', - 'pandas==0.0.1', - 'Pillow==0.0.1', - 'requests==0.0.1', - 'scikit-learn==0.0.1', - 'torch==0.0.1', - 'urllib3==0.0.1', - 'PyYAML==0.0.1', - ]) # `verta` and `cloudpickle` included by default + return Python( + [ + "click", + "jsonschema", + "googleapis-common-protos", + "pytimeparse", + "requests", + "urllib3", + "PyYAML", + ] + ) # `verta` and `cloudpickle` included by default -def test_check_model_dependencies_complete(dependency_testing_model, complete_env) -> None: - """ Verify that check_model_dependencies extracts all the expected packages from +def test_check_model_dependencies_complete( + dependency_testing_model, complete_env +) -> None: + """Verify that check_model_dependencies extracts all the expected packages from the test model class (dependency_testing_model fixture) and correctly reconciles them against the provided environment (complete_env fixture). """ @@ -40,12 +41,14 @@ def test_check_model_dependencies_complete(dependency_testing_model, complete_en ) -def test_check_model_dependencies_missing_raise(dependency_testing_model, complete_env) -> None: - """ Verify that check_model_dependencies raises an exception, with the +def test_check_model_dependencies_missing_raise( + dependency_testing_model, complete_env +) -> None: + """Verify that check_model_dependencies raises an exception, with the correct message, for missing packages when `raise_for_missing` is True. """ incomplete_env = Python( - [r for r in complete_env.requirements if r != 'click==0.0.1'] + [r for r in complete_env.requirements if not r.startswith("click==")] ) # drop a single dependency to be caught with pytest.raises(RuntimeError) as err: check_model_dependencies( @@ -53,16 +56,25 @@ def test_check_model_dependencies_missing_raise(dependency_testing_model, comple environment=incomplete_env, raise_for_missing=True, ) - assert err.value.args[0] == "the following packages are required by the model but missing " \ - "from the environment:\nclick (installed via ['click'])" + assert ( + err.value.args[0] + == "the following packages are required by the model but missing " + "from the environment:\nclick (installed via ['click'])" + ) -def test_check_model_dependencies_missing_warning(dependency_testing_model, complete_env) -> None: - """ Verify that check_model_dependencies defaults to raising a warning, with +def test_check_model_dependencies_missing_warning( + dependency_testing_model, complete_env +) -> None: + """Verify that check_model_dependencies defaults to raising a warning, with the correct message, for missing packages when `raise_for_missing` is False. """ incomplete_env = Python( - [r for r in complete_env.requirements if r not in ['PyYAML==0.0.1', 'pandas==0.0.1']] + [ + r + for r in complete_env.requirements + if not r.startswith(("PyYAML==", "requests==")) + ] ) # drop a single dependency to be caught with warnings.catch_warnings(record=True) as caught_warnings: assert not check_model_dependencies( @@ -71,6 +83,8 @@ def test_check_model_dependencies_missing_warning(dependency_testing_model, comp raise_for_missing=False, ) warn_msg = caught_warnings[0].message.args[0] - assert warn_msg == "the following packages are required by the model but missing " \ - "from the environment:\npandas (installed via ['pandas'])" \ - "\nyaml (installed via ['PyYAML'])" + assert ( + warn_msg == "the following packages are required by the model but missing " + "from the environment:\nrequests (installed via ['requests'])" + "\nyaml (installed via ['PyYAML'])" + ) diff --git a/client/verta/tests/unit_tests/registry/test_model_dependencies.py b/client/verta/tests/unit_tests/registry/test_model_dependencies.py index 8af9c2938e..b537c2c803 100644 --- a/client/verta/tests/unit_tests/registry/test_model_dependencies.py +++ b/client/verta/tests/unit_tests/registry/test_model_dependencies.py @@ -6,7 +6,7 @@ def test_class_functions(dependency_testing_model) -> None: - """ Verify that all the functions in the test class are recognized and + """Verify that all the functions in the test class are recognized and returned. """ expected_func_names = [ @@ -29,41 +29,42 @@ def test_class_functions(dependency_testing_model) -> None: def test_modules_in_function_body_wrapped(dependency_testing_model) -> None: - """ Verify that modules used within a function body are extracted - as expected, for a function that is wrapped in verify_io """ + """Verify that modules used within a function body are extracted + as expected, for a function that is wrapped in verify_io""" func: Callable = dependency_testing_model.predict - expected_modules = {'verta', 'yaml'} + expected_modules = {"verta", "yaml"} extracted_modules: Set[str] = md.modules_in_function_body(func) assert extracted_modules == expected_modules def test_modules_in_function_body_unwrapped(dependency_testing_model) -> None: - """ Verify that modules used within a function body are extracted - as expected, for a function that is not wrapped in verify_io """ + """Verify that modules used within a function body are extracted + as expected, for a function that is not wrapped in verify_io""" func: Callable = dependency_testing_model.unwrapped_predict - expected_modules = {'verta', 'click'} + expected_modules = {"verta", "click"} extracted_modules: Set[str] = md.modules_in_function_body(func) assert extracted_modules == expected_modules def test_modules_in_function_body_return_line(dependency_testing_model) -> None: - """ Verify that modules used only within a functions return line are extracted + """Verify that modules used only within a functions return line are extracted as expected, including when aliased (which causes them to be stored differently)""" func: Callable = dependency_testing_model.make_dataframe - expected_modules = {'pandas'} + expected_modules = {"pytimeparse"} extracted_modules: Set[str] = md.modules_in_function_body(func) assert extracted_modules == expected_modules def test_modules_in_function_body_as_class_instance(dependency_testing_model) -> None: - """ Verify that modules introduced only via class instance are extracted + """Verify that modules introduced only via class instance are extracted as expected. """ func: Callable = dependency_testing_model.make_message - expected_modules = {'google'} + expected_modules = {"google"} extracted_modules: Set[str] = md.modules_in_function_body(func) assert extracted_modules == expected_modules + def tests_modules_in_function_body_as_function(dependency_testing_model) -> None: """Verify that modules introduced only via function directly imported from a 3rd-party module are extracted as expected. @@ -75,80 +76,77 @@ def tests_modules_in_function_body_as_function(dependency_testing_model) -> None def test_modules_in_function_signature_wrapped(dependency_testing_model) -> None: - """ Verify that modules used in function arguments are extracted as + """Verify that modules used in function arguments are extracted as expected when the function is wrapped in verify_io. """ func: Callable = dependency_testing_model.predict expected_modules = { - 'calendar', - 'datetime', - 'numpy', - 'google', - 'pandas', + "calendar", + "datetime", + "click", + "google", + "cloudpickle", } extracted_modules: Set[str] = md.modules_in_function_signature(func) assert extracted_modules == expected_modules def test_modules_in_function_signature_unwrapped(dependency_testing_model) -> None: - """ Verify that modules used in function arguments are extracted as + """Verify that modules used in function arguments are extracted as expected with no function wrappers. """ func: Callable = dependency_testing_model.unwrapped_predict expected_modules = { - 'json', - 'collections', - 'sklearn', - 'cloudpickle', - 'requests', + "json", + "collections", + "yaml", + "cloudpickle", + "requests", } extracted_modules: Set[str] = md.modules_in_function_signature(func) assert extracted_modules == expected_modules def test_modules_in_function_return_type_hint_nested(dependency_testing_model) -> None: - """ Verify that modules used in function return type hints are extracted + """Verify that modules used in function return type hints are extracted as expected when nested inside another type construct. """ func: Callable = dependency_testing_model.nested_type_hint - expected_modules = {'torch'} + expected_modules = {"jsonschema"} extracted_modules: Set[str] = md.modules_in_function_signature(func) assert extracted_modules == expected_modules -def test_modules_in_function_return_type_hint_multiple(dependency_testing_model) -> None: - """ Verify that modules used in function return type hints are extracted +def test_modules_in_function_return_type_hint_multiple( + dependency_testing_model, +) -> None: + """Verify that modules used in function return type hints are extracted as expected when multiple return types are specified. """ func: Callable = dependency_testing_model.nested_multiple_returns_hint - expected_modules = {'urllib3', 'PIL'} + expected_modules = {"urllib3", "jsonschema"} extracted_modules: Set[str] = md.modules_in_function_signature(func) assert extracted_modules == expected_modules def test_class_module_names(dependency_testing_model) -> None: - """ Verify that all expected module names are extracted as expected. - """ + """Verify that all expected module names are extracted as expected.""" expected_modules = { - 'builtins', - 'calendar', - 'click', - 'cloudpickle', - 'collections', - 'datetime', - 'google', - 'json', - 'numpy', - 'pandas', - 'PIL', - 'requests', - 'requests', - 'sklearn', - 'torch', - 'typing', - 'urllib3', - 'verta', - 'yaml', + "builtins", + "calendar", + "click", + "cloudpickle", + "collections", + "datetime", + "google", + "json", + "jsonschema", + "pytimeparse", + "requests", + "typing", + "urllib3", + "verta", + "yaml", } extracted_modules: Set[str] = md.class_module_names(dependency_testing_model) assert set(extracted_modules) == set(expected_modules) @@ -161,15 +159,8 @@ def test_package_names(dependency_testing_model) -> None: skipped. """ expected_packages = { - 'sklearn': ['scikit-learn'], - 'PIL': ['Pillow'], - 'yaml': ['PyYAML'], + "google": ["protobuf", "googleapis-common-protos"], + "yaml": ["PyYAML"], } - extracted_packages: Dict[str, List[str]] = md.package_names( - { - 'sklearn', - 'PIL', - 'yaml' - } - ) + extracted_packages: Dict[str, List[str]] = md.package_names({"google", "yaml"}) assert extracted_packages == expected_packages