Skip to content

Commit

Permalink
feat: new gap analysis visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbarros committed Aug 8, 2023
1 parent 7287349 commit 7502f52
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 11 deletions.
19 changes: 12 additions & 7 deletions src/ydata_profiling/model/pandas/describe_timeseries_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ def compute_gap_stats(series: pd.Series) -> pd.Series:
gap = series.dropna()
index_name = gap.index.name if gap.index.name else "index"
gap = gap.reset_index()[index_name]
gap.index = gap
gap.index.name = None

if isinstance(series.index, pd.DatetimeIndex):
Expand All @@ -166,14 +165,20 @@ def compute_gap_stats(series: pd.Series) -> pd.Series:
period = np.abs(np.diff(series.index)).mean()
base_frequency = 1

gap = gap.diff()
diff = gap.diff()
anchors = gap[diff > period].index
gaps = []
for i in anchors:
gaps.append(gap.loc[gap.index[[i - 1, i]]].values)

stats = {
"period": period / base_frequency,
"min": gap.min() / base_frequency,
"max": gap.max() / base_frequency,
"mean": gap.mean() / base_frequency,
"std": gap.std() / base_frequency,
"normalized_diff": gap / period,
"min": diff.min() / base_frequency,
"max": diff.max() / base_frequency,
"mean": diff.mean() / base_frequency,
"std": diff.std() / base_frequency,
"series": series,
"gaps": gaps,
}
if isinstance(series.index, pd.DatetimeIndex):
stats["frequency"] = frequency
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _render_gap_tab(config: Settings, summary: dict) -> Container:

gap_plot = Image(
plot_timeseries_gap_analysis(
config, summary["gap_stats"]["normalized_diff"]
config, summary["gap_stats"]["series"], summary["gap_stats"]["gaps"]
),
image_format=config.plot.image_format,
alt="Gap plot",
Expand Down
14 changes: 11 additions & 3 deletions src/ydata_profiling/visualisation/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ def _format_ts_date_axis(
def plot_timeseries_gap_analysis(
config: Settings,
series: Union[pd.Series, List[pd.Series]],
gaps: Union[pd.Series, List[pd.Series]],
figsize: tuple = (6, 3),
) -> matplotlib.figure.Figure:
"""Plot an line plot from the data and return the AxesSubplot object.
Expand All @@ -573,10 +574,12 @@ def plot_timeseries_gap_analysis(
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)

colors = create_comparison_color_list(config)
if isinstance(series, list):
colors = create_comparison_color_list(config)
min_ = min(s.min() for s in series)
max_ = max(s.max() for s in series)
labels = config.html.style._labels
for serie, color, label in zip(series, colors, labels):
for serie, gaps_, color, label in zip(series, gaps, colors, labels):
serie.plot(
ax=ax,
label=label,
Expand All @@ -585,12 +588,17 @@ def plot_timeseries_gap_analysis(
)
_format_ts_date_axis(serie, ax)
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
for gap in gaps_:
ax.fill_between(x=gap, y1=min_, y2=max_, color=color, alpha=0.25)
else:
series.plot(ax=ax)
_format_ts_date_axis(series, ax)
ax.yaxis.set_major_locator(MaxNLocator(integer=True))

plt.ylabel("gap (#periods)")
for gap in gaps:
ax.fill_between(
x=gap, y1=series.min(), y2=series.max(), color=colors[0], alpha=0.25
)

return plot_360_n0sc0pe(config)

Expand Down

0 comments on commit 7502f52

Please sign in to comment.