Skip to content

Commit

Permalink
Fix #1155. (#1165)
Browse files Browse the repository at this point in the history
  • Loading branch information
ASmirnov-HORIS authored Aug 20, 2024
1 parent 7f534e8 commit 858a708
Show file tree
Hide file tree
Showing 7 changed files with 617 additions and 12 deletions.
549 changes: 549 additions & 0 deletions docs/f-24f/new_stat_bin_vars.ipynb

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions future_changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

### Added

- New variables computed by `'bin'` statistic: `'..sumprop..'`, `'..sumpct..'` [[#1155](https://github.com/JetBrains/lets-plot/issues/1155)].

See: [example notebook](https://nbviewer.jupyter.org/github/JetBrains/lets-plot/blob/master/docs/f-24f/new_stat_bin_vars.ipynb).

### Changed

### Fixed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ open class BinStat(
val statX = ArrayList<Double>()
val statCount = ArrayList<Double>()
val statDensity = ArrayList<Double>()
val statSumProp = ArrayList<Double>()
val statSumPct = ArrayList<Double>()

val rangeX = statCtx.overallXRange()
if (rangeX != null) { // null means all input values are null
Expand All @@ -61,6 +63,8 @@ open class BinStat(
statX.addAll(binsData.x)
statCount.addAll(binsData.count)
statDensity.addAll(binsData.density)
statSumProp.addAll(binsData.sumProp)
statSumPct.addAll(binsData.sumPct)
}

if (threshold != null) {
Expand All @@ -72,6 +76,8 @@ open class BinStat(
dropList.forEach {
statCount[it] = Double.NaN
statDensity[it] = Double.NaN
statSumProp[it] = Double.NaN
statSumPct[it] = Double.NaN
}

// resolution hack - need at least two consecutive X values, or width of the bin will be incorrect
Expand All @@ -87,6 +93,8 @@ open class BinStat(
.putNumeric(Stats.X, statX)
.putNumeric(Stats.COUNT, statCount)
.putNumeric(Stats.DENSITY, statDensity)
.putNumeric(Stats.SUMPROP, statSumProp)
.putNumeric(Stats.SUMPCT, statSumPct)
.build()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ object BinStatUtil {
xPosKind: BinStat.XPosKind,
xPos: Double,
binOptions: BinOptions
): BinsData {
): HistBinsData {
val (binCount, binWidth, startX) = getBinningParameters(rangeX, xPosKind, xPos, binOptions)

// density plot area should be == 1
Expand Down Expand Up @@ -192,7 +192,7 @@ object BinStatUtil {
binWidth: Double,
weightAtIndex: (Int) -> Double,
densityNormalizingFactor: Double
): BinsData {
): HistBinsData {

var totalCount = 0.0
val countByBinIndex = HashMap<Int, MutableDouble>()
Expand Down Expand Up @@ -220,6 +220,7 @@ object BinStatUtil {
val x = ArrayList<Double>()
val counts = ArrayList<Double>()
val densities = ArrayList<Double>()
val sumProps = ArrayList<Double>()

val x0 = startX + binWidth / 2
for (i in 0 until binCount) {
Expand All @@ -232,12 +233,14 @@ object BinStatUtil {
}

counts.add(count)
val density = count / totalCount * densityNormalizingFactor
val sumProp = count / totalCount
sumProps.add(sumProp)
val density = sumProp * densityNormalizingFactor
densities.add(density)
}

// return BinsData(x, counts, densities, dataIndicesByBinIndex)
return BinsData(x, counts, densities, List(x.size) { binWidth })
return HistBinsData(x, counts, densities, sumProps, sumProps.map { it * 100 }, List(x.size) { binWidth })
}

private fun computeDotdensityBins(
Expand Down Expand Up @@ -292,10 +295,19 @@ object BinStatUtil {

data class CountAndWidth(val count: Int, val width: Double)

class BinsData(
internal val x: List<Double>,
internal val count: List<Double>,
internal val density: List<Double>,
internal val binWidth: List<Double>
open class BinsData(
internal open val x: List<Double>,
internal open val count: List<Double>,
internal open val density: List<Double>,
internal open val binWidth: List<Double>
)

class HistBinsData(
internal override val x: List<Double>,
internal override val count: List<Double>,
internal override val density: List<Double>,
internal val sumProp: List<Double>,
internal val sumPct: List<Double>,
internal override val binWidth: List<Double>
) : BinsData(x, count, density, binWidth)
}
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,14 @@ class BinStatUtilTest {
assertContentEquals(listOf(-0.5, 0.0, 0.5, 1.0, 1.5, 2.0), statData.x)
assertContentEquals(listOf(1.0, 2.0, 0.0, 0.0, 1.0, 0.0), statData.count)
assertContentEquals(listOf(0.5, 1.0, 0.0, 0.0, 0.5, 0.0), statData.density)
assertContentEquals(listOf(0.25, 0.5, 0.0, 0.0, 0.25, 0.0), statData.sumProp)
assertContentEquals(listOf(25.0, 50.0, 0.0, 0.0, 25.0, 0.0), statData.sumPct)
}

@Test
fun checkHistogramDensityArea() {
fun checkHistogramNormalizedVariables() {
val tolerance = 1e-13

val checks = listOf(
listOf(0.0),
listOf(0.0, 1.0, 1.0),
Expand All @@ -157,7 +161,11 @@ class BinStatUtilTest {
val statData = BinStatUtil.computeHistogramStatSeries(data, rangeX, valuesX, xPosKind, xPos, binOptions)
val widthFactor = if (binWidth > 0) binWidth else 1.0
val area = widthFactor * statData.density.sum()
assertEquals(1.0, area, 1e-14)
assertEquals(1.0, area, tolerance)
val sumPropTotal = statData.sumProp.sum()
assertEquals(1.0, sumPropTotal, tolerance)
val sumPctTotal = statData.sumPct.sum()
assertEquals(100.0, sumPctTotal, tolerance)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class BinStatTest {
null
)
val statDf = stat.apply(df, SimpleStatContext(df))
DataFrameAssert.assertHasVars(statDf, listOf(Stats.X, Stats.COUNT, Stats.DENSITY), binCount)
DataFrameAssert.assertHasVars(statDf, listOf(Stats.X, Stats.COUNT, Stats.DENSITY, Stats.SUMPROP, Stats.SUMPCT), binCount)
return statDf
}

Expand Down Expand Up @@ -53,6 +53,12 @@ class BinStatTest {

// expecting density = [1 / width]
assertThat(statDf.getNumeric(Stats.DENSITY), Matchers.contains(1.0 / binWidth))

// expecting sumprop = [1]
assertThat(statDf.getNumeric(Stats.SUMPROP), Matchers.contains(1.0))

// expecting sumpct = [100]
assertThat(statDf.getNumeric(Stats.SUMPCT), Matchers.contains(100.0))
}

@Test
Expand All @@ -72,6 +78,14 @@ class BinStatTest {
// expecting density sum is equal to 1 / width
val area = binWidth * statDf.getNumeric(Stats.DENSITY).filterNotNull().sum()
assertThat(area, Matchers.closeTo(1.0, 1e-12))

// expecting sumprop sum is equal to 1
val sumPropTotal = statDf.getNumeric(Stats.SUMPROP).filterNotNull().sum()
assertThat(sumPropTotal, Matchers.closeTo(1.0, 1e-12))

// expecting sumpct sum is equal to 100
val sumPctTotal = statDf.getNumeric(Stats.SUMPCT).filterNotNull().sum()
assertThat(sumPctTotal, Matchers.closeTo(100.0, 1e-12))
}

@Test
Expand All @@ -91,5 +105,13 @@ class BinStatTest {
// expecting density sum is equal to 1 / width
val area = binWidth * statDf.getNumeric(Stats.DENSITY).filterNotNull().sum()
assertThat(area, Matchers.closeTo(1.0, 1e-12))

// expecting sumprop sum is equal to 1
val sumPropTotal = statDf.getNumeric(Stats.SUMPROP).filterNotNull().sum()
assertThat(sumPropTotal, Matchers.closeTo(1.0, 1e-12))

// expecting sumpct sum is equal to 100
val sumPctTotal = statDf.getNumeric(Stats.SUMPCT).filterNotNull().sum()
assertThat(sumPctTotal, Matchers.closeTo(100.0, 1e-12))
}
}
2 changes: 2 additions & 0 deletions python-package/lets_plot/plot/geom.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,8 @@ def geom_histogram(mapping=None, *, data=None, stat=None, position=None, show_le
- ..count.. : number of points with x-axis coordinate in the same bin.
- ..density.. : normalised number of points so that plot area is 1.
- ..sumprop.. : normalised number of points so that sum of y-values is 1.
- ..sumpct.. : normalised number of points so that sum of y-values is 100.
- ..binwidth.. : width of each bin.
`geom_histogram()` understands the following aesthetics mappings:
Expand Down

0 comments on commit 858a708

Please sign in to comment.