From 3b5c1c4e3df905d1163f9ec810da6aa0331ff548 Mon Sep 17 00:00:00 2001 From: Tim Abramson Date: Tue, 16 Feb 2021 13:22:51 -0600 Subject: [PATCH 1/4] refactor DataFrameOutputter for clarity and convert nulls to empty string in table/csv outputs --- src/code42cli/output_formats.py | 75 +++++++++++++----------- tests/test_output_formats.py | 100 ++++++++++++++++++-------------- 2 files changed, 97 insertions(+), 78 deletions(-) diff --git a/src/code42cli/output_formats.py b/src/code42cli/output_formats.py index b327f362c..2afb68544 100644 --- a/src/code42cli/output_formats.py +++ b/src/code42cli/output_formats.py @@ -3,7 +3,6 @@ import json import click -from pandas import DataFrame from code42cli.logger.formatters import CEF_TEMPLATE from code42cli.logger.formatters import map_event_to_cef @@ -81,41 +80,51 @@ def _requires_list_output(self): class DataFrameOutputFormatter: def __init__(self, output_format): - output_format = output_format.upper() if output_format else OutputFormat.TABLE - self.output_format = output_format - self._format_func = DataFrame.to_string - self._output_args = {"index": False} + self.output_format = ( + output_format.upper() if output_format else OutputFormat.TABLE + ) - if output_format == OutputFormat.CSV: - self._format_func = DataFrame.to_csv - elif output_format == OutputFormat.RAW: - self._format_func = DataFrame.to_json - self._output_args.update( - { - "orient": "records", - "lines": False, - "index": True, - "default_handler": str, - } - ) - elif output_format == OutputFormat.JSON: - self._format_func = DataFrame.to_json - self._output_args.update( - { - "orient": "records", - "lines": True, - "index": True, - "default_handler": str, - } - ) + def get_formatted_output(self, df, **kwargs): + if self.output_format == OutputFormat.JSON: + defaults = { + "orient": "records", + "lines": True, + "index": True, + "default_handler": str, + } + defaults.update(kwargs) + return df.to_json(**defaults) + + elif self.output_format == OutputFormat.RAW: + defaults = { + "orient": "records", + "lines": False, + "index": True, + "default_handler": str, + } + defaults.update(kwargs) + return df.to_json(**defaults) + + elif self.output_format == OutputFormat.CSV: + defaults = {"index": False} + defaults.update(kwargs) + df = df.fillna("") + return df.to_csv(**defaults) + + elif self.output_format == OutputFormat.TABLE: + defaults = {"index": False} + defaults.update(kwargs) + df = df.fillna("") + return df.to_string(**defaults) - def _format_output(self, output, *args, **kwargs): - self._output_args.update(kwargs) - return self._format_func(output, *args, **self._output_args) + else: + raise Exception( + f"DataFrameOutputFormatter received an invalid format: {self.output_format}" + ) - def echo_formatted_dataframe(self, output, *args, **kwargs): - str_output = self._format_output(output, *args, **kwargs) - if len(output) <= 10: + def echo_formatted_dataframe(self, df, **kwargs): + str_output = self.get_formatted_output(df, **kwargs) + if len(df) <= 10: click.echo(str_output) else: click.echo_via_pager(str_output) diff --git a/tests/test_output_formats.py b/tests/test_output_formats.py index b735955ea..ef8b6edf2 100644 --- a/tests/test_output_formats.py +++ b/tests/test_output_formats.py @@ -2,15 +2,17 @@ from collections import OrderedDict import pytest +from numpy import NaN from pandas import DataFrame import code42cli.output_formats as output_formats_module from code42cli.maps import FILE_EVENT_TO_SIGNATURE_ID_MAP +from code42cli.output_formats import DataFrameOutputFormatter from code42cli.output_formats import FileEventsOutputFormat from code42cli.output_formats import FileEventsOutputFormatter +from code42cli.output_formats import OutputFormat from code42cli.output_formats import to_cef - TEST_DATA = [ { "type$": "RULE_METADATA", @@ -771,53 +773,61 @@ def test_security_data_output_format_has_expected_options(): class TestDataFrameOutputFormatter: - def test_init_sets_format_func_to_formatted_json_function_when_json_format_option_is_passed( - self, mock_dataframe_to_json - ): - output_format = output_formats_module.OutputFormat.RAW - formatter = output_formats_module.DataFrameOutputFormatter(output_format) - formatter.echo_formatted_dataframe(TEST_DATAFRAME) - mock_dataframe_to_json.assert_called_once_with( - TEST_DATAFRAME, - orient="records", - lines=False, - index=True, - default_handler=str, + test_df = DataFrame( + [ + {"string_column": "string1", "int_column": 42, "null_column": None}, + {"string_column": "string2", "int_column": 43, "null_column": NaN}, + ] + ) + + def test_format_when_none_passed_defaults_to_table(self): + formatter = DataFrameOutputFormatter(output_format=None) + assert formatter.output_format == OutputFormat.TABLE + + def test_format_when_unknown_format_raises_cli_exception(self): + with pytest.raises(Exception): + formatter = DataFrameOutputFormatter("NOT_A_FORMAT") + formatter.get_formatted_output(self.test_df) + + def test_json_formatter_converts_to_expected_string(self): + formatter = DataFrameOutputFormatter(OutputFormat.JSON) + output = formatter.get_formatted_output(self.test_df) + assert ( + output + == '{"string_column":"string1","int_column":42,"null_column":null}\n{"string_column":"string2","int_column":43,"null_column":null}' ) - def test_init_sets_format_func_to_json_function_when_raw_json_format_option_is_passed( - self, mock_dataframe_to_json - ): - output_format = output_formats_module.OutputFormat.JSON - formatter = output_formats_module.DataFrameOutputFormatter(output_format) - formatter.echo_formatted_dataframe(TEST_DATAFRAME) - mock_dataframe_to_json.assert_called_once_with( - TEST_DATAFRAME, - orient="records", - lines=True, - index=True, - default_handler=str, + def test_raw_formatter_converts_to_expected_string(self): + formatter = DataFrameOutputFormatter(OutputFormat.RAW) + output = formatter.get_formatted_output(self.test_df) + assert ( + output + == '[{"string_column":"string1","int_column":42,"null_column":null},{"string_column":"string2","int_column":43,"null_column":null}]' ) - def test_init_sets_format_func_to_table_function_when_table_format_option_is_passed( - self, mock_dataframe_to_string - ): - output_format = output_formats_module.OutputFormat.TABLE - formatter = output_formats_module.DataFrameOutputFormatter(output_format) - formatter.echo_formatted_dataframe(TEST_DATAFRAME) - mock_dataframe_to_string.assert_called_once_with(TEST_DATAFRAME, index=False) + def test_csv_formatter_converts_to_expected_string(self): + formatter = DataFrameOutputFormatter(OutputFormat.CSV) + output = formatter.get_formatted_output(self.test_df) + assert ( + output == "string_column,int_column,null_column\nstring1,42,\nstring2,43,\n" + ) - def test_init_sets_format_func_to_csv_function_when_csv_format_option_is_passed( - self, mock_dataframe_to_csv - ): - output_format = output_formats_module.OutputFormat.CSV - formatter = output_formats_module.DataFrameOutputFormatter(output_format) - formatter.echo_formatted_dataframe(TEST_DATAFRAME) - mock_dataframe_to_csv.assert_called_once_with(TEST_DATAFRAME, index=False) + def test_table_formatter_converts_to_expected_string(self): + formatter = DataFrameOutputFormatter(OutputFormat.TABLE) + output = formatter.get_formatted_output(self.test_df) + assert output == ( + "string_column int_column null_column\n" + " string1 42 \n" + " string2 43 " + ) - def test_init_sets_format_func_to_table_function_when_no_format_option_is_passed( - self, mock_dataframe_to_string - ): - formatter = output_formats_module.DataFrameOutputFormatter(None) - formatter.echo_formatted_dataframe(TEST_DATAFRAME) - mock_dataframe_to_string.assert_called_once_with(TEST_DATAFRAME, index=False) + def test_echo_formatted_dataframe_uses_pager_when_gt_10_rows(self, mocker): + mock_echo = mocker.patch("click.echo") + mock_pager = mocker.patch("click.echo_via_pager") + formatter = DataFrameOutputFormatter(OutputFormat.TABLE) + big_df = DataFrame([{"column": val} for val in range(11)]) + small_df = DataFrame([{"column": val} for val in range(5)]) + formatter.echo_formatted_dataframe(big_df) + formatter.echo_formatted_dataframe(small_df) + assert mock_echo.call_count == 1 + assert mock_pager.call_count == 1 From c51d3d5773abd6b2e0b247aeaf3cbd1cdba88dd9 Mon Sep 17 00:00:00 2001 From: Tim Abramson Date: Wed, 17 Feb 2021 12:13:12 -0600 Subject: [PATCH 2/4] change Exception to ValueError --- src/code42cli/output_formats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/code42cli/output_formats.py b/src/code42cli/output_formats.py index 2afb68544..d5faba156 100644 --- a/src/code42cli/output_formats.py +++ b/src/code42cli/output_formats.py @@ -118,7 +118,7 @@ def get_formatted_output(self, df, **kwargs): return df.to_string(**defaults) else: - raise Exception( + raise ValueError( f"DataFrameOutputFormatter received an invalid format: {self.output_format}" ) From c036f010f7c8e5b0e92ec18c1725c9084edb8ae2 Mon Sep 17 00:00:00 2001 From: Tim Abramson Date: Wed, 17 Feb 2021 12:50:54 -0600 Subject: [PATCH 3/4] update test with correct exception --- tests/test_output_formats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_output_formats.py b/tests/test_output_formats.py index ef8b6edf2..4f38ab27f 100644 --- a/tests/test_output_formats.py +++ b/tests/test_output_formats.py @@ -785,7 +785,7 @@ def test_format_when_none_passed_defaults_to_table(self): assert formatter.output_format == OutputFormat.TABLE def test_format_when_unknown_format_raises_cli_exception(self): - with pytest.raises(Exception): + with pytest.raises(ValueError): formatter = DataFrameOutputFormatter("NOT_A_FORMAT") formatter.get_formatted_output(self.test_df) From cc9bb40a15092a237d5ed63b400dd953dfff8192 Mon Sep 17 00:00:00 2001 From: Tim Abramson Date: Wed, 17 Feb 2021 14:26:03 -0600 Subject: [PATCH 4/4] name test correctly --- tests/test_output_formats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_output_formats.py b/tests/test_output_formats.py index 4f38ab27f..9be0e4bc6 100644 --- a/tests/test_output_formats.py +++ b/tests/test_output_formats.py @@ -784,7 +784,7 @@ def test_format_when_none_passed_defaults_to_table(self): formatter = DataFrameOutputFormatter(output_format=None) assert formatter.output_format == OutputFormat.TABLE - def test_format_when_unknown_format_raises_cli_exception(self): + def test_format_when_unknown_format_raises_value_error(self): with pytest.raises(ValueError): formatter = DataFrameOutputFormatter("NOT_A_FORMAT") formatter.get_formatted_output(self.test_df)