JuliaStats · simonbyrne · Jun 27, 2017 · Jun 10, 2016 · Jun 10, 2016 · Oct 13, 2016
diff --git a/src/univariate.jl b/src/univariate.jl
@@ -67,29 +67,33 @@ function kde_range(boundary::(@compat Tuple{Real,Real}), npoints::Int)
 end
 
 # tabulate data for kde
-function tabulate(data::RealVector, midpoints::Range)
-    ndata = length(data)
+function tabulate(data::RealVector, weights::RealVector, midpoints::Range)
     npoints = length(midpoints)
     s = step(midpoints)
 
     # Set up a grid for discretized data
     grid = zeros(Float64, npoints)
-    ainc = 1.0 / (ndata*s*s)
+    ainc = 1.0 / (sum(weights)*s*s)
 
     # weighted discretization (cf. Jones and Lotwick)
-    for x in data
+    for (i,x) in enumerate(data)
         k = searchsortedfirst(midpoints,x)
         j = k-1
         if 1 <= j <= npoints-1
-            grid[j] += (midpoints[k]-x)*ainc
-            grid[k] += (x-midpoints[j])*ainc
+            grid[j] += (midpoints[k]-x)*ainc*weights[i]
+            grid[k] += (x-midpoints[j])*ainc*weights[i]
         end
     end
 
     # returns an un-convolved KDE
     UnivariateKDE(midpoints, grid)
 end
 
+function tabulate(data::RealVector, midpoints::Range)
+    weights = ones(data)
+    tabulate(data, weights, midpoints)
+end
+
 # convolve raw KDE with kernel
 # TODO: use in-place fft
 function conv(k::UnivariateKDE, dist::UnivariateDistribution)
@@ -118,31 +122,36 @@ function conv(k::UnivariateKDE, dist::UnivariateDistribution)
     UnivariateKDE(k.x, dens)
 end
 
+function uniformweights(data)
+    n = length(data)
+    fill(1/n, n)
+end
+
 # main kde interface methods
-function kde(data::RealVector, midpoints::Range, dist::UnivariateDistribution)
-    k = tabulate(data, midpoints)
+function kde(data::RealVector, weights::RealVector, midpoints::Range, dist::UnivariateDistribution)
+    k = tabulate(data, weights, midpoints)
     conv(k,dist)
 end
 
 function kde(data::RealVector, dist::UnivariateDistribution;
-             boundary::(@compat Tuple{Real,Real})=kde_boundary(data,std(dist)), npoints::Int=2048)
+             boundary::(@compat Tuple{Real,Real})=kde_boundary(data,std(dist)), npoints::Int=2048, weights=uniformweights(data))
 
     midpoints = kde_range(boundary,npoints)
-    kde(data,midpoints,dist)
+    kde(data,weights,midpoints,dist)
 end
 
 function kde(data::RealVector, midpoints::Range;
-            bandwidth=default_bandwidth(data), kernel=Normal)
+            bandwidth=default_bandwidth(data), kernel=Normal, weights=uniformweights(data))
     bandwidth > 0.0 || error("Bandwidth must be positive")
     dist = kernel_dist(kernel,bandwidth)
-    kde(data,midpoints,dist)
+    kde(data,weights,midpoints,dist)
 end
 
 function kde(data::RealVector; bandwidth=default_bandwidth(data), kernel=Normal,
-             npoints::Int=2048, boundary::(@compat Tuple{Real,Real})=kde_boundary(data,bandwidth))
+             npoints::Int=2048, boundary::(@compat Tuple{Real,Real})=kde_boundary(data,bandwidth), weights=uniformweights(data))
     bandwidth > 0.0 || error("Bandwidth must be positive")
     dist = kernel_dist(kernel,bandwidth)
-    kde(data,dist;boundary=boundary,npoints=npoints)
+    kde(data,dist;boundary=boundary,npoints=npoints,weights=weights)
 end
 
 # Select bandwidth using least-squares cross validation, from: