Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorizing Indicators #32

Merged
merged 26 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
700032d
Initial attempt at adding vectorized vspans
hoxbro Sep 22, 2023
b719ada
Update merging in annotation table
hoxbro Sep 22, 2023
1b10352
Implement ranges_2d
hoxbro Sep 22, 2023
9d923f9
Update tests
hoxbro Sep 22, 2023
4bc1532
Update pyproject.toml
hoxbro Sep 22, 2023
c5bb305
Update minimum version to 3.9
hoxbro Sep 22, 2023
a113163
Add initial support for 1d_points
hoxbro Sep 22, 2023
4d49002
Add simple algorithm for point detection
hoxbro Sep 22, 2023
108e36c
Improve loading of load_annotation_table if not a range
hoxbro Sep 22, 2023
2bacb19
Fix failing tests
hoxbro Sep 23, 2023
ccd0543
Have _collapse_region_df output the correct columns and dtypes
hoxbro Sep 27, 2023
865d052
Misc minor changes
hoxbro Sep 27, 2023
1d6a0fb
skip show_regions if value is not Iterable
hoxbro Sep 27, 2023
4790df7
Simplify logic in Indicators as they don't need special logic for emp…
hoxbro Sep 27, 2023
0b80509
Update pre-commit
hoxbro Sep 27, 2023
d2cf493
Move get_indices_by_position into Annotation Table
hoxbro Sep 27, 2023
b2b47da
Remove if statement
hoxbro Sep 27, 2023
7550472
Update points_2d to new way (though not working)
hoxbro Sep 27, 2023
24cf1a5
Remove code no longer needed
hoxbro Sep 27, 2023
75b2758
Always have _expanded_region_df return the correct column and types
hoxbro Sep 27, 2023
b416b3f
Add more table tests
hoxbro Sep 27, 2023
63d6ef3
Align signatures for create dataframe functions
hoxbro Sep 28, 2023
3ec0861
Update table tests
hoxbro Sep 28, 2023
883264b
Add check for invalid dims
hoxbro Sep 28, 2023
1596105
Remove test no longer needed
hoxbro Sep 28, 2023
567489d
sort invalid spec
hoxbro Sep 28, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@ concurrency:
cancel-in-progress: true

jobs:
pre-commit:
runs-on: ubuntu-latest
pre_commit:
name: Run pre-commit
runs-on: 'ubuntu-latest'
steps:
- uses: actions/checkout@v3
- uses: pre-commit/action@v3.0.0
- uses: holoviz-dev/holoviz_tasks/pre-commit@v0.1a17
unit_test_suite:
name: Unit tests on ${{ matrix.os }} with Python ${{ matrix.python-version }}
needs: [pre-commit]
needs: [pre_commit]
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.8', '3.9', '3.10', '3.11']
python-version: ['3.9', '3.10', '3.11']
timeout-minutes: 10
steps:
- uses: actions/checkout@v3
Expand Down
301 changes: 147 additions & 154 deletions holonote/annotate/annotator.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions holonote/annotate/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class Connector(param.Parameterized):
np.dtype('datetime64[ns]'): 'TIMESTAMP',
np.dtype('<M8'):'TIMESTAMP',
np.float64: 'REAL',
np.int64: 'INTEGER',
}

@classmethod
Expand Down
86 changes: 48 additions & 38 deletions holonote/annotate/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import weakref
from typing import TYPE_CHECKING, Any

import numpy as np
import pandas as pd
import param

Expand Down Expand Up @@ -342,56 +341,62 @@ def define_ranges(self, dims, startx, endx, starty=None, endy=None):
self._region_df = pd.concat((self._region_df, additions), ignore_index=True)
self._update_index()

def _collapse_region_df(self, columns: list[str] | None=None) -> pd.DataFrame:
# TODO: Move columns filtering to the top!
regions = self._region_df.groupby("dim")["region"].first()
def _empty_expanded_region_df(self, *, spec: SpecDict, dims: list[str] | None) -> pd.DataFrame:
invalid_dims = set(dims) - spec.keys()
if invalid_dims:
invalid_dims_str = ", ".join([f"{dim!r}" for dim in sorted(invalid_dims)])
msg = f"Dimension(s) {invalid_dims_str} not in the spec"
raise ValueError(msg)

columns, types = [], []
for dim in dims:
region = spec[dim]["region"]
dtype = spec[dim]["type"]()
if region == "range":
columns.extend([f"start[{dim}]", f"end[{dim}]"])
types.extend([dtype, dtype])
else:
columns.append(f"{region}[{dim}]")
types.append(dtype)

return pd.DataFrame([types], columns=columns).drop(index=0)

def _expand_region_df(self, *, spec: SpecDict, dims: list[str] | None=None) -> pd.DataFrame:
data = self._region_df.pivot(index="_id", columns="dim", values="value")
dims = list(dims or spec)

expanded = self._empty_expanded_region_df(spec=spec, dims=dims)
if data.empty:
return data
return expanded

all_columns = list(data.columns)
dims = columns or all_columns
set_index = True
for dim in dims:
region = regions.get(dim)
if region is None:
region = spec[dim]["region"]
if dim not in data.columns:
continue
elif region == "range":
na_mask = data[dim].isnull()
data.loc[na_mask, dim] = data.loc[na_mask, dim].apply(lambda *x: (None, None))
data[[f"start[{dim}]", f"end[{dim}]"]] = list(data[dim])
expanded[[f"start[{dim}]", f"end[{dim}]"]] = list(data[dim])
else:
data[f"{region}[{dim}]"] = data[dim].infer_objects()
dtype = expanded.dtypes[f"{region}[{dim}]"]
expanded[f"{region}[{dim}]"] = data[dim].astype(dtype)

# Clean up
data = data.drop(all_columns, axis=1)
data.index.name = None
data.columns.name = None
return data
if set_index:
expanded.index = data.index
set_index = False

@property
def dataframe(self) -> pd.DataFrame:
return expanded

def get_dataframe(self, *, spec: SpecDict | None=None, dims: list[str] | None=None) -> pd.DataFrame:
field_df = self._field_df
region_df = self._collapse_region_df()
region_df = self._expand_region_df(spec=spec, dims=dims)

df = pd.merge(region_df, field_df, left_index=True, right_index=True)
df = region_df.merge(field_df, left_index=True, right_index=True, how="left")
df.index.name = self._field_df.index.name
df = df.reindex(field_df.index)
return df

def _filter(self, dim_mask, region_type):
region_mask = self._region_df["region"] == region_type
return self._region_df[region_mask & dim_mask]

def _mask1D(self, kdims):
return self._region_df["dim"] == str(kdims[0])

def _mask2D(self, kdims):
dim1_name, dim2_name = str(kdims[0]), str(kdims[1])
return np.logical_and(
self._region_df["dim1"] == dim1_name, self._region_df["dim2"] == dim2_name
)

def load_annotation_table(self, conn: Connector, fields: list[str], spec: SpecDict) -> None:
"""Load the AnnotationTable region and field DataFrame from a connector.

Expand Down Expand Up @@ -424,12 +429,17 @@ def load_annotation_table(self, conn: Connector, fields: list[str], spec: SpecDi
else:
value = df[f"{region}_{kdim}"]

d = {"region": region, "dim": kdim, "value": value, "_id": list(df.index)}
data.append(pd.DataFrame(d))
subdata = pd.DataFrame(
{"region": region, "dim": kdim, "value": value, "_id": list(df.index)}
)
if region == "range":
empty_mask = subdata["value"] == (None, None)
else:
empty_mask = subdata["value"].isnull()

data.append(subdata[~empty_mask])

rdf = pd.concat(data, ignore_index=True)
empty_mask = (rdf.value == (None, None)) | rdf.value.isnull()
self._region_df = rdf[~empty_mask].copy()
self._region_df = pd.concat(data, ignore_index=True)

self._update_index()
self.clear_edits()
Expand Down
2 changes: 1 addition & 1 deletion holonote/annotate/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class SpecItem(TypedDict):
The region specification is either "range", "point", or "geometry".
"""
type: Callable
region: Literal["range"] | Literal["point"] | Literal["geometry"]
region: Literal["range", "point", "geometry"]


SpecDict = dict[str, SpecItem]
2 changes: 1 addition & 1 deletion holonote/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import contextlib
from typing import Iterator
from collections.abc import Iterator

import holoviews as hv
import numpy as np
Expand Down
Loading