Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Dash mark #3074

Merged
merged 3 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions doc/_docstrings/objects.Dash.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "3227e585-7166-44e7-b0c2-8570e098102d",
"metadata": {
"tags": [
"hide"
]
},
"outputs": [],
"source": [
"import seaborn.objects as so\n",
"from seaborn import load_dataset\n",
"penguins = load_dataset(\"penguins\")"
]
},
{
"cell_type": "raw",
"id": "1b424322-eaa4-45c7-8007-a671ef2afbde",
"metadata": {},
"source": [
"A line segment is drawn for each datapoint, centered on the value along the orientation axis:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fc835356-2dc2-4583-a9f9-c1fe0a6cc9ea",
"metadata": {},
"outputs": [],
"source": [
"p = so.Plot(penguins, \"species\", \"body_mass_g\", color=\"sex\")\n",
"p.add(so.Dash())"
]
},
{
"cell_type": "raw",
"id": "ad9b94de-f19f-4e60-8275-686e749da39c",
"metadata": {},
"source": [
"A number of properties can be mapped or set directly:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6070a665-ab19-43a6-9eba-e206193d9422",
"metadata": {},
"outputs": [],
"source": [
"p.add(so.Dash(alpha=.5), linewidth=\"flipper_length_mm\")"
]
},
{
"cell_type": "raw",
"id": "2c4a8291-0a84-4e70-a992-756850933791",
"metadata": {},
"source": [
"The mark has a `width` property, which is relative to the spacing between orientation values:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "315327da-421e-46c8-8a1b-8b87355d0439",
"metadata": {},
"outputs": [],
"source": [
"p.add(so.Dash(width=.5))"
]
},
{
"cell_type": "raw",
"id": "224bf51a-b8d8-4d8e-b0ab-b63ec6788584",
"metadata": {},
"source": [
"When dodged, the width will automatically adapt:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "227e889c-7ce7-49fc-b985-f7746393930e",
"metadata": {},
"outputs": [],
"source": [
"p.add(so.Dash(), so.Dodge())"
]
},
{
"cell_type": "raw",
"id": "aa807f57-5d37-4faa-8fd2-1e5378115f9f",
"metadata": {},
"source": [
"This mark works well to show aggregate values when paired with a strip plot:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5141e0b8-ea1a-4178-adde-21b4bc2e705f",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" p\n",
" .add(so.Dash(), so.Agg(), so.Dodge())\n",
" .add(so.Dots(), so.Dodge(), so.Jitter())\n",
")"
]
},
{
"cell_type": "raw",
"id": "f2abd4b7-5afb-4661-95f3-b51bfa101273",
"metadata": {},
"source": [
"When both coordinate variables are numeric, you can control the orientation explicitly:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6d7e236-327f-460f-b12e-46d7444ac348",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" so.Plot(\n",
" penguins[\"body_mass_g\"],\n",
" penguins[\"flipper_length_mm\"].round(-1),\n",
" )\n",
" .add(so.Dash(), orient=\"y\")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6811d776-93e5-49ce-88a6-14786a67841d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "py310",
"language": "python",
"name": "py310"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Mark objects
Lines
Path
Paths
Dash
Range

.. rubric:: Bar marks
Expand Down
2 changes: 2 additions & 0 deletions doc/whatsnew/v0.12.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ v0.12.1 (Unreleased)

- |Feature| Added the :class:`objects.Text` mark (:pr:`3051`).

- |Feature| Added the :class:`objects.Dash` mark (:pr:`3074`).

- |Feature| Added the :class:`objects.Perc` stat (:pr:`3063`).

- |Feature| The :class:`Band` and :class:`Range` marks will now cover the full extent of the data if `min` / `max` variables are not explicitly assigned or added in a transform (:pr:`3056`).
Expand Down
93 changes: 46 additions & 47 deletions seaborn/_marks/line.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,9 @@ def __post_init__(self):
# even when they are dashed. It's a slight inconsistency, but looks fine IMO.
self.artist_kws.setdefault("capstyle", mpl.rcParams["lines.solid_capstyle"])

def _setup_lines(self, split_gen, scales, orient):
def _plot(self, split_gen, scales, orient):

line_data = {}

for keys, data, ax in split_gen(keep_na=not self._sort):

if ax not in line_data:
Expand All @@ -180,24 +179,16 @@ def _setup_lines(self, split_gen, scales, orient):
"linestyles": [],
}

segments = self._setup_segments(data, orient)
line_data[ax]["segments"].extend(segments)
n = len(segments)

vals = resolve_properties(self, keys, scales)
vals["color"] = resolve_color(self, keys, scales=scales)

if self._sort:
data = data.sort_values(orient, kind="mergesort")

# Column stack to avoid block consolidation
xy = np.column_stack([data["x"], data["y"]])
line_data[ax]["segments"].append(xy)
line_data[ax]["colors"].append(vals["color"])
line_data[ax]["linewidths"].append(vals["linewidth"])
line_data[ax]["linestyles"].append(vals["linestyle"])

return line_data

def _plot(self, split_gen, scales, orient):

line_data = self._setup_lines(split_gen, scales, orient)
line_data[ax]["colors"].extend([vals["color"]] * n)
line_data[ax]["linewidths"].extend([vals["linewidth"]] * n)
line_data[ax]["linestyles"].extend([vals["linestyle"]] * n)

for ax, ax_data in line_data.items():
lines = mpl.collections.LineCollection(**ax_data, **self.artist_kws)
Expand Down Expand Up @@ -225,6 +216,16 @@ def _legend_artist(self, variables, value, scales):
**artist_kws,
)

def _setup_segments(self, data, orient):

if self._sort:
data = data.sort_values(orient, kind="mergesort")

# Column stack to avoid block consolidation
xy = np.column_stack([data["x"], data["y"]])

return [xy]


@document_properties
@dataclass
Expand Down Expand Up @@ -255,41 +256,39 @@ class Range(Paths):
.. include:: ../docstrings/objects.Range.rst

"""
def _setup_lines(self, split_gen, scales, orient):

line_data = {}

other = {"x": "y", "y": "x"}[orient]
def _setup_segments(self, data, orient):

for keys, data, ax in split_gen(keep_na=not self._sort):
# TODO better checks on what variables we have
# TODO what if only one exist?
val = {"x": "y", "y": "x"}[orient]
if not set(data.columns) & {f"{val}min", f"{val}max"}:
agg = {f"{val}min": (val, "min"), f"{val}max": (val, "max")}
data = data.groupby(orient).agg(**agg).reset_index()

if ax not in line_data:
line_data[ax] = {
"segments": [],
"colors": [],
"linewidths": [],
"linestyles": [],
}

# TODO better checks on what variables we have
cols = [orient, f"{val}min", f"{val}max"]
data = data[cols].melt(orient, value_name=val)[["x", "y"]]
segments = [d.to_numpy() for _, d in data.groupby(orient)]
return segments

vals = resolve_properties(self, keys, scales)
vals["color"] = resolve_color(self, keys, scales=scales)

# TODO what if only one exist?
if not set(data.columns) & {f"{other}min", f"{other}max"}:
agg = {f"{other}min": (other, "min"), f"{other}max": (other, "max")}
data = data.groupby(orient).agg(**agg).reset_index()
@document_properties
@dataclass
class Dash(Paths):
"""
A line mark drawn as an oriented segment for each datapoint.

cols = [orient, f"{other}min", f"{other}max"]
data = data[cols].melt(orient, value_name=other)[["x", "y"]]
segments = [d.to_numpy() for _, d in data.groupby(orient)]
Examples
--------
.. include:: ../docstrings/objects.Dash.rst

line_data[ax]["segments"].extend(segments)
"""
width: MappableFloat = Mappable(.8, grouping=False)

n = len(segments)
line_data[ax]["colors"].extend([vals["color"]] * n)
line_data[ax]["linewidths"].extend([vals["linewidth"]] * n)
line_data[ax]["linestyles"].extend([vals["linestyle"]] * n)
def _setup_segments(self, data, orient):

return line_data
ori = ["x", "y"].index(orient)
xys = data[["x", "y"]].to_numpy().astype(float)
segments = np.stack([xys, xys], axis=1)
segments[:, 0, ori] -= data["width"] / 2
segments[:, 1, ori] += data["width"] / 2
return segments
2 changes: 1 addition & 1 deletion seaborn/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from seaborn._marks.area import Area, Band # noqa: F401
from seaborn._marks.bar import Bar, Bars # noqa: F401
from seaborn._marks.dot import Dot, Dots # noqa: F401
from seaborn._marks.line import Line, Lines, Path, Paths, Range # noqa: F401
from seaborn._marks.line import Dash, Line, Lines, Path, Paths, Range # noqa: F401
from seaborn._marks.text import Text # noqa: F401

from seaborn._stats.base import Stat # noqa: F401
Expand Down
Loading