Skip to content

Commit

Permalink
Cleanup and merge #2909 (#2955)
Browse files Browse the repository at this point in the history
* Sorting boxenplot

* Boxenplot separate kws

Removed `kwargs` which were used to draw the median lines and scatter plot of outliers previously.

Added separate kwargs - `box_kws`, `line_kws` (drawing the median lines) and `flier_kws` (for the scatter of outliers).

Updated the matching docstring.

* In the previous commit most code on the categorical.py file was auto-reformatted. Here it is reverted and only the changes to `seaborn.categorical.boxenplot` and `seaborn.categorical._LVPlotter` are kept.

* Reinserted blank lines in docstring.

* - Removed redundant indention in `boxenplot` function
- Removed commented out code in the `plot` function

* Removed default kwargs from `plot`

* Removing commented out code

* Reverted to ternary expressions

* Replaced default kwargs assignment to box_kws
Disentangled the nested for loop for default kwargs assignment

* Removed remaining `kwargs` item in docstring

* Resolved incorrect reference in the box_kws item on the docstring.

* Resolved incorrect descriptions for box_kws, line_kws and flier_kws.

* Changed line_kws update to source arguments frmo box_kws if there is only a single data point.

* Added line_kws test

* Added flier_kws test, renamed line_kws test

* Tests - further work is required in expanding the tests.
Two current issues
(a) most are not testing when multiple categories are used on the x-axis, but only a single one.
(b) the tests for the box_kws functionality are very slim.

* Fix lint issues

* Fix pinned tests

* Update release notes

* Cleanup boxenplot colors test

Co-authored-by: EitanHemed <37670372+EitanHemed@users.noreply.github.com>
  • Loading branch information
mwaskom and EitanHemed authored Aug 13, 2022
1 parent e9ad419 commit b1db0f7
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 43 deletions.
2 changes: 2 additions & 0 deletions doc/whatsnew/v0.12.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ Other updates

- |Enhancement| It is now possible to plot a discrete :func:`histplot` as a step function or polygon (:pr:`2859`).

- |Enhancement| It is now possible to customize the appearance of elements in a :func:`boxenplot` with `box_kws`/`line_kws`/`flier_kws` (:pr:`2909`).

- |Fix| Improved integration with the matplotlib color cycle in most axes-level functions (:pr:`2449`).

- |Fix| Fixed a regression in 0.11.2 that caused some functions to stall indefinitely or raise when the input data had a duplicate index (:pr:`2776`).
Expand Down
99 changes: 68 additions & 31 deletions seaborn/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1840,24 +1840,52 @@ def _width_functions(self, width_func):

def _lvplot(self, box_data, positions,
color=[255. / 256., 185. / 256., 0.],
widths=1, ax=None, **kws):
widths=1, ax=None, box_kws=None,
flier_kws=None,
line_kws=None):

# -- Default keyword dicts - based on
# distributions.plot_univariate_histogram
box_kws = {} if box_kws is None else box_kws.copy()
flier_kws = {} if flier_kws is None else flier_kws.copy()
line_kws = {} if line_kws is None else line_kws.copy()

# Set the default kwargs for the boxes
box_default_kws = dict(edgecolor=self.gray,
linewidth=self.linewidth)
for k, v in box_default_kws.items():
box_kws.setdefault(k, v)

# Set the default kwargs for the lines denoting medians
line_default_kws = dict(
color=".15", alpha=0.45, solid_capstyle="butt", linewidth=self.linewidth
)
for k, v in line_default_kws.items():
line_kws.setdefault(k, v)

# Set the default kwargs for the outliers scatterplot
flier_default_kws = dict(marker='d', color=self.gray)
for k, v in flier_default_kws.items():
flier_kws.setdefault(k, v)

vert = self.orient == "v"
x = positions[0]
box_data = np.asarray(box_data)

# If we only have one data point, plot a line
if len(box_data) == 1:
kws.update({
'color': self.gray, 'linestyle': '-', 'linewidth': self.linewidth
line_kws.update({
'color': box_kws['edgecolor'],
'linestyle': box_kws.get('linestyle', '-'),
'linewidth': max(box_kws["linewidth"], line_kws["linewidth"])
})
ys = [box_data[0], box_data[0]]
xs = [x - widths / 2, x + widths / 2]
if vert:
xx, yy = xs, ys
else:
xx, yy = ys, xs
ax.plot(xx, yy, **kws)
ax.plot(xx, yy, **line_kws)
else:
# Get the number of data points and calculate "depth" of
# letter-value plot
Expand Down Expand Up @@ -1912,43 +1940,44 @@ def horz_perc_box(x, b, i, k, w):
xs_outliers = outliers
ys_outliers = np.full(len(outliers), x)

boxes = [box_func(x, b[0], i, k, b[1])
for i, b in enumerate(zip(box_ends, w_area))]

# Plot the medians
ax.plot(
xs_median,
ys_median,
c=".15",
alpha=0.45,
solid_capstyle="butt",
linewidth=self.linewidth,
**kws
**line_kws
)

# Plot outliers (if any)
if len(outliers) > 0:
ax.scatter(xs_outliers, ys_outliers, marker='d',
c=self.gray, **kws)
ax.scatter(xs_outliers, ys_outliers,
**flier_kws
)

# Construct a color map from the input color
rgb = [hex_color, (1, 1, 1)]
cmap = mpl.colors.LinearSegmentedColormap.from_list('new_map', rgb)
# Make sure that the last boxes contain hue and are not pure white
rgb = [hex_color, cmap(.85)]
cmap = mpl.colors.LinearSegmentedColormap.from_list('new_map', rgb)
collection = PatchCollection(
boxes, cmap=cmap, edgecolor=self.gray, linewidth=self.linewidth
)

# Update box_kws with `cmap` if not defined in dict until now
box_kws.setdefault('cmap', cmap)

boxes = [box_func(x, b[0], i, k, b[1])
for i, b in enumerate(zip(box_ends, w_area))]

collection = PatchCollection(boxes, **box_kws)

# Set the color gradation, first box will have color=hex_color
collection.set_array(np.array(np.linspace(1, 0, len(boxes))))

# Plot the boxes
ax.add_collection(collection)

def draw_letter_value_plot(self, ax, kws):
def draw_letter_value_plot(self, ax, box_kws=None, flier_kws=None,
line_kws=None):
"""Use matplotlib to draw a letter value plot on an Axes."""

for i, group_data in enumerate(self.plot_data):

if self.plot_hues is None:
Expand All @@ -1972,7 +2001,9 @@ def draw_letter_value_plot(self, ax, kws):
color=color,
widths=self.width,
ax=ax,
**kws)
box_kws=box_kws,
flier_kws=flier_kws,
line_kws=line_kws)

else:
# Draw nested groups of boxes
Expand Down Expand Up @@ -2001,14 +2032,16 @@ def draw_letter_value_plot(self, ax, kws):
color=color,
widths=self.nested_width,
ax=ax,
**kws)
box_kws=box_kws,
flier_kws=flier_kws,
line_kws=line_kws)

# Autoscale the values axis to make sure all patches are visible
ax.autoscale_view(scalex=self.orient == "h", scaley=self.orient == "v")

def plot(self, ax, boxplot_kws):
def plot(self, ax, box_kws, flier_kws, line_kws):
"""Make the plot."""
self.draw_letter_value_plot(ax, boxplot_kws)
self.draw_letter_value_plot(ax, box_kws, flier_kws, line_kws)
self.annotate_axes(ax)
if self.orient == "h":
ax.invert_yaxis()
Expand Down Expand Up @@ -2366,10 +2399,10 @@ def boxenplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
orient=None, color=None, palette=None, saturation=.75,
width=.8, dodge=True, k_depth='tukey', linewidth=None,
scale='exponential', outlier_prop=0.007, trust_alpha=0.05, showfliers=True,
ax=None, **kwargs
scale='exponential', outlier_prop=0.007, trust_alpha=0.05,
showfliers=True,
ax=None, box_kws=None, flier_kws=None, line_kws=None,
):

plotter = _LVPlotter(x, y, hue, data, order, hue_order,
orient, color, palette, saturation,
width, dodge, k_depth, linewidth, scale,
Expand All @@ -2378,7 +2411,7 @@ def boxenplot(
if ax is None:
ax = plt.gca()

plotter.plot(ax, kwargs)
plotter.plot(ax, box_kws, flier_kws, line_kws)
return ax


Expand All @@ -2392,7 +2425,6 @@ def boxenplot(
plotting more quantiles, it provides more information about the shape of
the distribution, particularly in the tails. For a more extensive
explanation, you can read the paper that introduced the plot:
https://vita.had.co.nz/papers/letter-value-plot.html
{categorical_narrative}
Expand Down Expand Up @@ -2431,10 +2463,15 @@ def boxenplot(
showfliers : bool, optional
If False, suppress the plotting of outliers.
{ax_in}
kwargs : key, value mappings
Other keyword arguments are passed through to
:meth:`matplotlib.axes.Axes.plot` and
:meth:`matplotlib.axes.Axes.scatter`.
box_kws: dict, optional
Keyword arguments for the box artists; passed to
:class:`matplotlib.patches.Rectangle`.
line_kws: dict, optional
Keyword arguments for the line denoting the median; passed to
:meth:`matplotlib.axes.Axes.plot`.
flier_kws: dict, optional
Keyword arguments for the scatter denoting the outlier observations;
passed to :meth:`matplotlib.axes.Axes.scatter`.
Returns
-------
Expand Down
82 changes: 70 additions & 12 deletions tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -3193,19 +3193,23 @@ def test_axes_data(self):

def test_box_colors(self):

fig = plt.figure()
ax = cat.boxenplot(x="g", y="y", data=self.df, saturation=1)
fig.canvas.draw()
pal = palettes.color_palette(n_colors=3)
patches = filter(self.ispatch, ax.collections)
assert same_color([patch.get_facecolor()[0] for patch in patches], pal)
pal = palettes.color_palette()

fig = plt.figure()
ax = cat.boxenplot(x="g", y="y", hue="h", data=self.df, saturation=1)
fig.canvas.draw()
pal = palettes.color_palette(n_colors=2)
patches = filter(self.ispatch, ax.collections)
assert same_color([patch.get_facecolor()[0] for patch in patches], pal * 3)
ax = cat.boxenplot(
x="g", y="y", data=self.df, saturation=1, showfliers=False
)
ax.figure.canvas.draw()
for i, box in enumerate(ax.collections):
assert same_color(box.get_facecolor()[0], pal[i])

plt.close("all")

ax = cat.boxenplot(
x="g", y="y", hue="h", data=self.df, saturation=1, showfliers=False
)
ax.figure.canvas.draw()
for i, box in enumerate(ax.collections):
assert same_color(box.get_facecolor()[0], pal[i % 2])

plt.close("all")

Expand Down Expand Up @@ -3357,6 +3361,60 @@ def test_Float64_input(self):

plt.close("all")

def test_line_kws(self):
line_kws = {'linewidth': 5, 'color': 'purple',
'linestyle': '-.'}

ax = cat.boxenplot(data=self.df, y='y', line_kws=line_kws)

median_line = ax.lines[0]

assert median_line.get_linewidth() == line_kws['linewidth']
assert median_line.get_linestyle() == line_kws['linestyle']
assert median_line.get_color() == line_kws['color']

plt.close("all")

def test_flier_kws(self):
flier_kws = {
'marker': 'v',
'color': np.array([[1, 0, 0, 1]]),
's': 5,
}

ax = cat.boxenplot(data=self.df, y='y', x='g', flier_kws=flier_kws)

outliers_scatter = ax.findobj(mpl.collections.PathCollection)[0]

# The number of vertices for a triangle is 3, the length of Path
# collection objects is defined as n + 1 vertices.
assert len(outliers_scatter.get_paths()[0]) == 4
assert len(outliers_scatter.get_paths()[-1]) == 4

assert (outliers_scatter.get_facecolor() == flier_kws['color']).all()

assert np.unique(outliers_scatter.get_sizes()) == flier_kws['s']

plt.close("all")

def test_box_kws(self):

box_kws = {'linewidth': 5, 'edgecolor': np.array([[0, 1, 0, 1]])}

ax = cat.boxenplot(data=self.df, y='y', x='g',
box_kws=box_kws)

boxes = ax.findobj(mpl.collections.PatchCollection)[0]

# The number of vertices for a triangle is 3, the length of Path
# collection objects is defined as n + 1 vertices.
assert len(boxes.get_paths()[0]) == 5
assert len(boxes.get_paths()[-1]) == 5

assert np.unique(boxes.get_linewidth() == box_kws['linewidth'])

plt.close("all")


class TestBeeswarm:

Expand Down

0 comments on commit b1db0f7

Please sign in to comment.