From d1a244d32a807bf333ceeff012a38842f4a79d3c Mon Sep 17 00:00:00 2001 From: Villu Ruusmann Date: Thu, 14 Dec 2023 11:35:52 +0200 Subject: [PATCH] Added PandasUtil#formatPandasCategorical(List>) utility method --- .../java/org/jpmml/lightgbm/PandasUtil.java | 20 +++++++++++-- .../org/jpmml/lightgbm/PandasUtilTest.java | 28 +++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasUtil.java b/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasUtil.java index 4cb9305..df36e3a 100644 --- a/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasUtil.java +++ b/pmml-lightgbm/src/main/java/org/jpmml/lightgbm/PandasUtil.java @@ -43,13 +43,27 @@ public List> parsePandasCategorical(String string){ JsonElement element = JsonParser.parseString(string); - Gson gson = new GsonBuilder() - .setObjectToNumberStrategy(ToNumberPolicy.LONG_OR_DOUBLE) - .create(); + Gson gson = createGson(); return gson.fromJson(element, ListOfLists.class); } + static + public String formatPandasCategorical(List> objects){ + Gson gson = createGson(); + + return PandasUtil.PREFIX_PANDAS_CATEGORICAL + gson.toJson(objects, ListOfLists.class); + } + + static + private Gson createGson(){ + Gson result = new GsonBuilder() + .setObjectToNumberStrategy(ToNumberPolicy.LONG_OR_DOUBLE) + .create(); + + return result; + } + static private class ListOfLists extends ArrayList> { } diff --git a/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasUtilTest.java b/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasUtilTest.java index 7317201..1ce3942 100644 --- a/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasUtilTest.java +++ b/pmml-lightgbm/src/test/java/org/jpmml/lightgbm/PandasUtilTest.java @@ -29,18 +29,36 @@ public class PandasUtilTest { @Test - public void parse() throws Exception { - List> pandasCategories = parsePandasCategorical("null"); + public void parse(){ + assertNull(parsePandasCategorical("null")); - assertNull(pandasCategories); + assertEquals(PandasUtilTest.CATEGORIES_LIST_OF_LISTS, parsePandasCategorical(PandasUtilTest.CATEGORIES_STRING)); + } - pandasCategories = parsePandasCategorical("[[\"null\", \"A\", \"B, B\", \"C, [C], C\"], [-2, -1, 0, 1, 2], [-2.0, -1.0, 0.0, 1.0, 2.0], [false, true]]"); + @Test + public void format(){ + assertEquals("null", formatPandasCategorical(null)); - assertEquals(Arrays.asList(Arrays.asList("null", "A", "B, B", "C, [C], C"), Arrays.asList(-2L, -1L, 0L, 1L, 2L), Arrays.asList(-2d, -1d, 0d, 1d, 2d), Arrays.asList(Boolean.FALSE, Boolean.TRUE)), pandasCategories); + assertEquals(removeWhitespace(PandasUtilTest.CATEGORIES_STRING), removeWhitespace(formatPandasCategorical(PandasUtilTest.CATEGORIES_LIST_OF_LISTS))); } static private List> parsePandasCategorical(String value){ return PandasUtil.parsePandasCategorical(PandasUtil.PREFIX_PANDAS_CATEGORICAL + value); } + + static + private String formatPandasCategorical(List> objects){ + String result = PandasUtil.formatPandasCategorical(objects); + + return result.substring(PandasUtil.PREFIX_PANDAS_CATEGORICAL.length()); + } + + static + private String removeWhitespace(String string){ + return string.replaceAll("\\s",""); + } + + private static final String CATEGORIES_STRING = "[[\"null\", \"A\", \"B, B\", \"C, [C], C\"], [-2, -1, 0, 1, 2], [-2.0, -1.0, 0.0, 1.0, 2.0], [false, true]]"; + private static final List> CATEGORIES_LIST_OF_LISTS = Arrays.asList(Arrays.asList("null", "A", "B, B", "C, [C], C"), Arrays.asList(-2L, -1L, 0L, 1L, 2L), Arrays.asList(-2d, -1d, 0d, 1d, 2d), Arrays.asList(Boolean.FALSE, Boolean.TRUE)); } \ No newline at end of file