-
-
Notifications
You must be signed in to change notification settings - Fork 18.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds custom plot formatting for TimedeltaIndex. #15067
Changes from 13 commits
efe5636
b967d24
5ec65fa
f021cbd
41ebc85
91954bd
3abc310
7d28842
c7851e3
b6e6a81
d588c2c
f5f32bc
4eff697
232efe6
7db61ec
945ec14
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
from pandas import Index, Series, DataFrame | ||
|
||
from pandas.tseries.index import date_range, bdate_range | ||
from pandas.tseries.tdi import timedelta_range | ||
from pandas.tseries.offsets import DateOffset | ||
from pandas.tseries.period import period_range, Period, PeriodIndex | ||
from pandas.tseries.resample import DatetimeIndex | ||
|
@@ -1271,6 +1272,63 @@ def test_plot_outofbounds_datetime(self): | |
values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] | ||
self.plt.plot(values) | ||
|
||
def test_format_timedelta_ticks_narrow(self): | ||
|
||
expected_labels = [ | ||
'00:00:00.00000000{:d}'.format(i) | ||
for i in range(10)] | ||
|
||
rng = timedelta_range('0', periods=10, freq='ns') | ||
df = DataFrame(np.random.randn(len(rng), 3), rng) | ||
ax = df.plot(fontsize=2) | ||
fig = ax.get_figure() | ||
fig.canvas.draw() | ||
labels = ax.get_xticklabels() | ||
self.assertEqual(len(labels), len(expected_labels)) | ||
for l, l_expected in zip(labels, expected_labels): | ||
self.assertEqual(l.get_text(), l_expected) | ||
|
||
def test_format_timedelta_ticks_wide(self): | ||
|
||
expected_labels = [ | ||
'00:00:00', | ||
'1 days 03:46:40', | ||
'2 days 07:33:20', | ||
'3 days 11:20:00', | ||
'4 days 15:06:40', | ||
'5 days 18:53:20', | ||
'6 days 22:40:00', | ||
'8 days 02:26:40', | ||
'' | ||
] | ||
|
||
rng = timedelta_range('0', periods=10, freq='1 d') | ||
df = DataFrame(np.random.randn(len(rng), 3), rng) | ||
ax = df.plot(fontsize=2) | ||
fig = ax.get_figure() | ||
fig.canvas.draw() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think those two lines above are needed? (and the same for the test above) |
||
labels = ax.get_xticklabels() | ||
self.assertEqual(len(labels), len(expected_labels)) | ||
for l, l_expected in zip(labels, expected_labels): | ||
self.assertEqual(l.get_text(), l_expected) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test needs more work. For this test, we basically want to check that for a narrow window (as shown below) we get nanosecond precision, and if at 0 to start without days. rng = timedelta_range('0', periods=3, freq='ns')
# rough code to follow
expected_labels = [
'00:00:00.000000001',
'00:00:00.000000002,
'00:00:00.000000001']
for label, expected_label in zip(ax.get_xtick_labels(), expected_labels):
# check that there are no days
self.assertEqual(label, expected_label) We also want to check for a wide window that we have second precision.
|
||
def test_timedelta_plot(self): | ||
# test issue #8711 | ||
s = Series(range(5), timedelta_range('1day', periods=5)) | ||
_check_plot_works(s.plot) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add here the three examples you showed in the top post of the PR? (just with the same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed, just added the 2 easy ones above. |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have no idea if check_plots_works will actually do anything useful here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This basically just tests that the plot call does not error. So, for example, that means that it will test that it does not raise a MemoryError like it does now in some cases |
||
# test long period | ||
index = timedelta_range('1 day 2 hr 30 min 10 s', | ||
periods=10, freq='1 d') | ||
s = Series(np.random.randn(len(index)), index) | ||
_check_plot_works(s.plot) | ||
|
||
# test short period | ||
index = timedelta_range('1 day 2 hr 30 min 10 s', | ||
periods=10, freq='1 ns') | ||
s = Series(np.random.randn(len(index)), index) | ||
_check_plot_works(s.plot) | ||
|
||
|
||
def _check_plot_works(f, freq=None, series=None, *args, **kwargs): | ||
import matplotlib.pyplot as plt | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ | |
from pandas.tseries.offsets import DateOffset | ||
import pandas.tseries.frequencies as frequencies | ||
from pandas.tseries.index import DatetimeIndex | ||
from pandas.tseries.period import PeriodIndex | ||
from pandas.tseries.tdi import TimedeltaIndex | ||
from pandas.formats.printing import pprint_thing | ||
import pandas.compat as compat | ||
|
||
|
@@ -49,7 +51,7 @@ def tsplot(series, plotf, ax=None, **kwargs): | |
lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) | ||
|
||
# set date formatter, locators and rescale limits | ||
format_dateaxis(ax, ax.freq) | ||
format_dateaxis(ax, ax.freq, series.index) | ||
return lines | ||
|
||
|
||
|
@@ -278,8 +280,22 @@ def _maybe_convert_index(ax, data): | |
# Patch methods for subplot. Only format_dateaxis is currently used. | ||
# Do we need the rest for convenience? | ||
|
||
|
||
def format_dateaxis(subplot, freq): | ||
def format_timedelta_ticks(x, pos, n_decimals): | ||
''' Convert seconds to 'D days HH:MM:SS.F' ''' | ||
s, ns = divmod(x, 1e9) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use triple double quotes not single quotes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
m, s = divmod(s, 60) | ||
h, m = divmod(m, 60) | ||
d, h = divmod(h, 24) | ||
decimals = int(ns * 10**(n_decimals - 9)) | ||
s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) | ||
if n_decimals > 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are pretty much reinveting the wheel here.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can simply call
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, python timedelta doesn't support nanoseconds. In [4]: pd.Timedelta(1e-9, unit='s')
Out[4]: Timedelta('0 days 00:00:00')
In [5]: pd.Timedelta(1e-3, unit='s')
Out[5]: Timedelta('0 days 00:00:00.001000') There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. of course it does. This is pandas Timedelta. (you are looking at the printing), which by-default suppres it
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok great, got confused by the output. So how do I say I want n decimals, because that would be a drop-in for what I've already written. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, suppressing days when they are 0 would be nice. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe the
could compress the code a bit, but I'm not sure it would save significant space over what I've already got. |
||
s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) | ||
if d != 0: | ||
s = '{:d} days '.format(int(d)) + s | ||
return s | ||
|
||
|
||
def format_dateaxis(subplot, freq, index): | ||
""" | ||
Pretty-formats the date axis (x-axis). | ||
|
||
|
@@ -288,26 +304,44 @@ def format_dateaxis(subplot, freq): | |
default, changing the limits of the x axis will intelligently change | ||
the positions of the ticks. | ||
""" | ||
majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, | ||
minor_locator=False, | ||
plot_obj=subplot) | ||
minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, | ||
minor_locator=True, | ||
plot_obj=subplot) | ||
subplot.xaxis.set_major_locator(majlocator) | ||
subplot.xaxis.set_minor_locator(minlocator) | ||
|
||
majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, | ||
|
||
# handle index specific formatting | ||
# Note: DatetimeIndex does not use this | ||
# interface. DatetimeIndex uses matplotlib.date directly | ||
if isinstance(index, PeriodIndex): | ||
|
||
majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, | ||
minor_locator=False, | ||
plot_obj=subplot) | ||
minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, | ||
minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, | ||
minor_locator=True, | ||
plot_obj=subplot) | ||
subplot.xaxis.set_major_formatter(majformatter) | ||
subplot.xaxis.set_minor_formatter(minformatter) | ||
|
||
# x and y coord info | ||
subplot.format_coord = lambda t, y: ( | ||
"t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) | ||
subplot.xaxis.set_major_locator(majlocator) | ||
subplot.xaxis.set_minor_locator(minlocator) | ||
|
||
majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, | ||
minor_locator=False, | ||
plot_obj=subplot) | ||
minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, | ||
minor_locator=True, | ||
plot_obj=subplot) | ||
subplot.xaxis.set_major_formatter(majformatter) | ||
subplot.xaxis.set_minor_formatter(minformatter) | ||
|
||
# x and y coord info | ||
subplot.format_coord = lambda t, y: ( | ||
"t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) | ||
|
||
elif isinstance(index, TimedeltaIndex): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add tests for this in You should just need a couple of datasets
In the tests, if you could check that the formatting is correct, and maybe check the number of ticklabels. |
||
from matplotlib import ticker | ||
(vmin, vmax) = tuple(subplot.xaxis.get_view_interval()) | ||
n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason not to make this interface similar to the above? e.g. why not create a TimeSeries_TimedeltaFormatter ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed in latest commit. |
||
if n_decimals > 9: | ||
n_decimals = 9 | ||
formatter = ticker.FuncFormatter( | ||
lambda x, pos: format_timedelta_ticks(x, pos, n_decimals)) | ||
subplot.xaxis.set_major_formatter(formatter) | ||
else: | ||
raise IOError('index type not supported') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use TypeError There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
|
||
pylab.draw_if_interactive() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add a link to visualization.rst?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is certainly possible, we only have not really a section about timedelta plotting currently.