From 7913d32ab0de7584276affc27e0f3e97e9052ef7 Mon Sep 17 00:00:00 2001 From: Karl Burtram Date: Wed, 29 Jul 2020 16:05:23 -0700 Subject: [PATCH] Update transform and add tests (#1024) --- .../DataTransformation.cs | 55 ++++++++++---- .../DataTransformationTests.cs | 74 +++++++++++++++++-- 2 files changed, 107 insertions(+), 22 deletions(-) diff --git a/src/Microsoft.InsightsGenerator/DataTransformation.cs b/src/Microsoft.InsightsGenerator/DataTransformation.cs index 8dde847b01..7de2ba2abf 100644 --- a/src/Microsoft.InsightsGenerator/DataTransformation.cs +++ b/src/Microsoft.InsightsGenerator/DataTransformation.cs @@ -36,7 +36,7 @@ private string[] GetColumnLabels(DataArray array) Dictionary> columnInfo = new Dictionary>(); for (int column = 0; column < columnCount; ++column) { - int distinctValues; + int distinctValues; DataArray.DataType dataType = GetColumnType(array, column, out distinctValues); if (!columnInfo.ContainsKey(dataType)) { @@ -60,21 +60,46 @@ private string[] GetColumnLabels(DataArray array) { labels[dateColumns[i].ColumnIndex] = "input_t_" + i; } - } - - if (columnInfo.ContainsKey(DataArray.DataType.String)) - { - int startingIndex = 0; - List stringColumns = columnInfo[DataArray.DataType.String]; - if (stringColumns.Count > 1) + if (columnInfo.ContainsKey(DataArray.DataType.String)) { - labels[stringColumns[startingIndex].ColumnIndex] = "input_g_0"; - ++startingIndex; + List stringColumns = columnInfo[DataArray.DataType.String]; + for (int i = 0; i < stringColumns.Count; ++i) + { + labels[stringColumns[i].ColumnIndex] = "slicer_" + i; + } } - - for (int i = 0; i < stringColumns.Count - startingIndex; ++i) + } + else + { + if (columnInfo.ContainsKey(DataArray.DataType.String)) { - labels[stringColumns[i + startingIndex].ColumnIndex] = "slicer_" + i; + int minDistinctValue = -1; + int minColumnIndex = -1; + + List stringColumns = columnInfo[DataArray.DataType.String]; + for (int i = 0; i < stringColumns.Count; ++i) + { + if (minDistinctValue == -1 || minDistinctValue > stringColumns[i].DistinctValues) + { + minDistinctValue = stringColumns[i].DistinctValues; + minColumnIndex = i; + } + } + + labels[minColumnIndex] = "input_g_0"; + + int adjustIndex = 0; + for (int i = 0; i < stringColumns.Count; ++i) + { + if (i != minColumnIndex) + { + labels[stringColumns[i].ColumnIndex] = "slicer_" + (i - adjustIndex); + } + else + { + ++adjustIndex; + } + } } } @@ -94,7 +119,7 @@ private DataArray.DataType GetColumnType(DataArray array, int column, out int di { // count number of distinct values HashSet values = new HashSet(); - for (int row = 0; row < array.Cells.Length; ++row) + for (int row = 0; row < array.Cells.Length; ++row) { if (!values.Contains(array.Cells[row][column])) { @@ -120,7 +145,7 @@ private DataArray.DataType GetColumnType(DataArray array, int column, out int di { return DataArray.DataType.Number; } - + DateTime dateValue; if (DateTime.TryParse(firstValueString, out dateValue)) { diff --git a/test/Microsoft.InsightsGenerator.UnitTests/DataTransformationTests.cs b/test/Microsoft.InsightsGenerator.UnitTests/DataTransformationTests.cs index 130b42414e..cbef459afe 100644 --- a/test/Microsoft.InsightsGenerator.UnitTests/DataTransformationTests.cs +++ b/test/Microsoft.InsightsGenerator.UnitTests/DataTransformationTests.cs @@ -3,7 +3,6 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. // -using System; using Xunit; namespace Microsoft.InsightsGenerator.UnitTests @@ -67,7 +66,7 @@ public void Tranform_TimeSlicerCount_ProvidedTypes() } [Fact] - public void Tranform_TimeGroupSlicerCount() + public void Tranform_TimeSlicerSlicerCount() { DataTransformer transformer = new DataTransformer(); object[][] cells = new object[5][]; @@ -85,13 +84,13 @@ public void Tranform_TimeGroupSlicerCount() array = transformer.Transform(array); Assert.Equal(array.TransformedColumnNames[0], "input_t_0"); - Assert.Equal(array.TransformedColumnNames[1], "input_g_0"); - Assert.Equal(array.TransformedColumnNames[2], "slicer_0"); + Assert.Equal(array.TransformedColumnNames[1], "slicer_0"); + Assert.Equal(array.TransformedColumnNames[2], "slicer_1"); Assert.Equal(array.TransformedColumnNames[3], "output_0"); } [Fact] - public void Tranform_TimeSlicerCountGroup() + public void Tranform_TimeSlicerCountSlicer() { DataTransformer transformer = new DataTransformer(); object[][] cells = new object[5][]; @@ -111,10 +110,10 @@ public void Tranform_TimeSlicerCountGroup() Assert.Equal(array.TransformedColumnNames[0], "input_t_0"); Assert.Equal(array.TransformedColumnNames[1], "slicer_0"); Assert.Equal(array.TransformedColumnNames[2], "output_0"); - Assert.Equal(array.TransformedColumnNames[3], "input_g_0"); + Assert.Equal(array.TransformedColumnNames[3], "slicer_1"); } - [Fact] + [Fact] public void Tranform_TimeSlicerCountCount() { DataTransformer transformer = new DataTransformer(); @@ -134,5 +133,66 @@ public void Tranform_TimeSlicerCountCount() Assert.Equal(array.TransformedColumnNames[2], "output_0"); Assert.Equal(array.TransformedColumnNames[3], "output_1"); } + + [Fact] + public void Tranform_GroupSlicerTime() + { + DataTransformer transformer = new DataTransformer(); + object[][] cells = new object[2][]; + cells[0] = new object[3] { "1st Street", "Redmond", 110 }; + cells[1] = new object[3] { "2nd Street", "Bellevue", 160 }; + + DataArray array = new DataArray() + { + ColumnNames = new string[] { "Address", "City", "Count" }, + Cells = cells + }; + + array = transformer.Transform(array); + Assert.Equal(array.TransformedColumnNames[0], "input_g_0"); + Assert.Equal(array.TransformedColumnNames[1], "slicer_0"); + Assert.Equal(array.TransformedColumnNames[2], "output_0"); + } + + [Fact] + public void Tranform_SlicewrGroupSlicerTime() + { + DataTransformer transformer = new DataTransformer(); + object[][] cells = new object[2][]; + cells[0] = new object[4] { "1st Street", "Redmond", "North", 110 }; + cells[1] = new object[4] { "2nd Street", "Redmond", "East", 160 }; + + DataArray array = new DataArray() + { + ColumnNames = new string[] { "Address", "City", "Direction", "Count" }, + Cells = cells + }; + + array = transformer.Transform(array); + Assert.Equal(array.TransformedColumnNames[0], "slicer_0"); + Assert.Equal(array.TransformedColumnNames[1], "input_g_0"); + Assert.Equal(array.TransformedColumnNames[2], "slicer_1"); + Assert.Equal(array.TransformedColumnNames[3], "output_0"); + } + + [Fact] + public void Tranform_SlicerGroupTime() + { + DataTransformer transformer = new DataTransformer(); + object[][] cells = new object[2][]; + cells[0] = new object[3] { "1st Street", "Redmond", 110 }; + cells[1] = new object[3] { "2nd Street", "Redmond", 160 }; + + DataArray array = new DataArray() + { + ColumnNames = new string[] { "Address", "City", "Count" }, + Cells = cells + }; + + array = transformer.Transform(array); + Assert.Equal(array.TransformedColumnNames[0], "slicer_0"); + Assert.Equal(array.TransformedColumnNames[1], "input_g_0"); + Assert.Equal(array.TransformedColumnNames[2], "output_0"); + } } }