Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chore/dataframe formatter handle nulls #245

Merged
merged 5 commits into from
Feb 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 42 additions & 33 deletions src/code42cli/output_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json

import click
from pandas import DataFrame

from code42cli.logger.formatters import CEF_TEMPLATE
from code42cli.logger.formatters import map_event_to_cef
Expand Down Expand Up @@ -81,41 +80,51 @@ def _requires_list_output(self):

class DataFrameOutputFormatter:
def __init__(self, output_format):
output_format = output_format.upper() if output_format else OutputFormat.TABLE
self.output_format = output_format
self._format_func = DataFrame.to_string
self._output_args = {"index": False}
self.output_format = (
output_format.upper() if output_format else OutputFormat.TABLE
)

if output_format == OutputFormat.CSV:
self._format_func = DataFrame.to_csv
elif output_format == OutputFormat.RAW:
self._format_func = DataFrame.to_json
self._output_args.update(
{
"orient": "records",
"lines": False,
"index": True,
"default_handler": str,
}
)
elif output_format == OutputFormat.JSON:
self._format_func = DataFrame.to_json
self._output_args.update(
{
"orient": "records",
"lines": True,
"index": True,
"default_handler": str,
}
)
def get_formatted_output(self, df, **kwargs):
if self.output_format == OutputFormat.JSON:
defaults = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could use a helper method _get_json_defaults(lines=True) to share this part with the .RAW part

"orient": "records",
"lines": True,
"index": True,
"default_handler": str,
}
defaults.update(kwargs)
return df.to_json(**defaults)

elif self.output_format == OutputFormat.RAW:
defaults = {
"orient": "records",
"lines": False,
"index": True,
"default_handler": str,
}
defaults.update(kwargs)
return df.to_json(**defaults)

elif self.output_format == OutputFormat.CSV:
defaults = {"index": False}
defaults.update(kwargs)
df = df.fillna("")
return df.to_csv(**defaults)

elif self.output_format == OutputFormat.TABLE:
defaults = {"index": False}
defaults.update(kwargs)
df = df.fillna("")
return df.to_string(**defaults)

def _format_output(self, output, *args, **kwargs):
self._output_args.update(kwargs)
return self._format_func(output, *args, **self._output_args)
else:
raise ValueError(
f"DataFrameOutputFormatter received an invalid format: {self.output_format}"
)

def echo_formatted_dataframe(self, output, *args, **kwargs):
str_output = self._format_output(output, *args, **kwargs)
if len(output) <= 10:
def echo_formatted_dataframe(self, df, **kwargs):
str_output = self.get_formatted_output(df, **kwargs)
if len(df) <= 10:
click.echo(str_output)
else:
click.echo_via_pager(str_output)
Expand Down
100 changes: 55 additions & 45 deletions tests/test_output_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
from collections import OrderedDict

import pytest
from numpy import NaN
from pandas import DataFrame

import code42cli.output_formats as output_formats_module
from code42cli.maps import FILE_EVENT_TO_SIGNATURE_ID_MAP
from code42cli.output_formats import DataFrameOutputFormatter
from code42cli.output_formats import FileEventsOutputFormat
from code42cli.output_formats import FileEventsOutputFormatter
from code42cli.output_formats import OutputFormat
from code42cli.output_formats import to_cef


TEST_DATA = [
{
"type$": "RULE_METADATA",
Expand Down Expand Up @@ -771,53 +773,61 @@ def test_security_data_output_format_has_expected_options():


class TestDataFrameOutputFormatter:
def test_init_sets_format_func_to_formatted_json_function_when_json_format_option_is_passed(
self, mock_dataframe_to_json
):
output_format = output_formats_module.OutputFormat.RAW
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
mock_dataframe_to_json.assert_called_once_with(
TEST_DATAFRAME,
orient="records",
lines=False,
index=True,
default_handler=str,
test_df = DataFrame(
[
{"string_column": "string1", "int_column": 42, "null_column": None},
{"string_column": "string2", "int_column": 43, "null_column": NaN},
]
)

def test_format_when_none_passed_defaults_to_table(self):
formatter = DataFrameOutputFormatter(output_format=None)
assert formatter.output_format == OutputFormat.TABLE

def test_format_when_unknown_format_raises_value_error(self):
with pytest.raises(ValueError):
formatter = DataFrameOutputFormatter("NOT_A_FORMAT")
formatter.get_formatted_output(self.test_df)

def test_json_formatter_converts_to_expected_string(self):
formatter = DataFrameOutputFormatter(OutputFormat.JSON)
output = formatter.get_formatted_output(self.test_df)
assert (
output
== '{"string_column":"string1","int_column":42,"null_column":null}\n{"string_column":"string2","int_column":43,"null_column":null}'
)

def test_init_sets_format_func_to_json_function_when_raw_json_format_option_is_passed(
self, mock_dataframe_to_json
):
output_format = output_formats_module.OutputFormat.JSON
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
mock_dataframe_to_json.assert_called_once_with(
TEST_DATAFRAME,
orient="records",
lines=True,
index=True,
default_handler=str,
def test_raw_formatter_converts_to_expected_string(self):
formatter = DataFrameOutputFormatter(OutputFormat.RAW)
output = formatter.get_formatted_output(self.test_df)
assert (
output
== '[{"string_column":"string1","int_column":42,"null_column":null},{"string_column":"string2","int_column":43,"null_column":null}]'
)

def test_init_sets_format_func_to_table_function_when_table_format_option_is_passed(
self, mock_dataframe_to_string
):
output_format = output_formats_module.OutputFormat.TABLE
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
mock_dataframe_to_string.assert_called_once_with(TEST_DATAFRAME, index=False)
def test_csv_formatter_converts_to_expected_string(self):
formatter = DataFrameOutputFormatter(OutputFormat.CSV)
output = formatter.get_formatted_output(self.test_df)
assert (
output == "string_column,int_column,null_column\nstring1,42,\nstring2,43,\n"
)

def test_init_sets_format_func_to_csv_function_when_csv_format_option_is_passed(
self, mock_dataframe_to_csv
):
output_format = output_formats_module.OutputFormat.CSV
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
mock_dataframe_to_csv.assert_called_once_with(TEST_DATAFRAME, index=False)
def test_table_formatter_converts_to_expected_string(self):
formatter = DataFrameOutputFormatter(OutputFormat.TABLE)
output = formatter.get_formatted_output(self.test_df)
assert output == (
"string_column int_column null_column\n"
" string1 42 \n"
" string2 43 "
)

def test_init_sets_format_func_to_table_function_when_no_format_option_is_passed(
self, mock_dataframe_to_string
):
formatter = output_formats_module.DataFrameOutputFormatter(None)
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
mock_dataframe_to_string.assert_called_once_with(TEST_DATAFRAME, index=False)
def test_echo_formatted_dataframe_uses_pager_when_gt_10_rows(self, mocker):
mock_echo = mocker.patch("click.echo")
mock_pager = mocker.patch("click.echo_via_pager")
formatter = DataFrameOutputFormatter(OutputFormat.TABLE)
big_df = DataFrame([{"column": val} for val in range(11)])
small_df = DataFrame([{"column": val} for val in range(5)])
formatter.echo_formatted_dataframe(big_df)
formatter.echo_formatted_dataframe(small_df)
assert mock_echo.call_count == 1
assert mock_pager.call_count == 1