Skip to content

Commit

Permalink
Merge pull request #143 from PixelgenTechnologies/chore/fix-performan…
Browse files Browse the repository at this point in the history
…ce-for-many-small-layouts

Fix write performance for many small layouts
  • Loading branch information
johandahlberg authored May 24, 2024
2 parents 593252e + d1ae712 commit dea41fe
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 6 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [UNRELEASED] - 2024-??-??

### Added


### Changed


### Fixed

* Poor performance when writing many small layouts to pxl file (~45x speed-up). This should almost only
impact test scenarios, since most real components should be large enough for this not to be an issue.


### Removed



## [0.17.0] - 2024-05-23

### Added
Expand Down
38 changes: 32 additions & 6 deletions src/pixelator/pixeldataset/datastores.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,13 @@ def read_precomputed_layouts(
def write_precomputed_layouts(
self,
layouts: PreComputedLayouts,
collapse_to_single_dataframe: bool = False,
) -> None:
"""Write pre-computed layouts to the data store.
:param layouts: The pre-computed layouts to write.
:param collapse_to_single_dataframe: Whether to collapse the layouts into
a single dataframe before writing.
"""
...

Expand Down Expand Up @@ -454,6 +457,7 @@ def write_colocalization(self, colocalization: pd.DataFrame) -> None:
def write_precomputed_layouts(
self,
layouts: Optional[PreComputedLayouts],
collapse_to_single_dataframe: bool = False,
) -> None:
"""Write pre-computed layouts to the data store."""
if layouts is None:
Expand All @@ -463,15 +467,27 @@ def write_precomputed_layouts(
self._check_if_writeable(self.LAYOUTS_KEY)

logger.debug("Starting to write layouts...")

for idx, layouts_to_write in enumerate(layouts.component_iterator()):
if idx % 100 == 0:
logger.debug("Writing layouts...")
# This option is in place to allow collecting all the layouts into
# as single dataframe before writing (they will still be written into
# partitions), but this is much faster than writing them one by one
# for scenarios with many very small layouts.
if collapse_to_single_dataframe:
logger.debug("Writing from a single dataframe...")
self.write_dataframe(
layouts_to_write,
layouts.to_df(),
self.LAYOUTS_KEY,
partitioning=PreComputedLayouts.DEFAULT_PARTITIONING,
)
else:
logger.debug("Writing by iterating components...")
for idx, layouts_to_write in enumerate(layouts.component_iterator()):
if idx % 100 == 0:
logger.debug("Writing layouts...")
self.write_dataframe(
layouts_to_write,
self.LAYOUTS_KEY,
partitioning=PreComputedLayouts.DEFAULT_PARTITIONING,
)

logger.debug("Completed writing layouts...")

Expand Down Expand Up @@ -506,7 +522,16 @@ def save(self, dataset: PixelDataset, force_overwrite: bool = False) -> None:

if dataset.precomputed_layouts is not None:
logger.debug("Writing precomputed layouts")
self.write_precomputed_layouts(dataset.precomputed_layouts)
# This speeds things up massively when you have many, very small
# layouts, like we do in some test data.
try:
write_layouts_in_one_go = dataset.adata.obs["vertices"].sum() < 100_000
except KeyError:
write_layouts_in_one_go = False
self.write_precomputed_layouts(
dataset.precomputed_layouts,
collapse_to_single_dataframe=write_layouts_in_one_go,
)

logger.debug("PixelDataset saved to %s", self.path)

Expand Down Expand Up @@ -560,6 +585,7 @@ def read_dataframe_lazy(self, key: str) -> Optional[pl.LazyFrame]:
def write_precomputed_layouts(
self,
layouts: Optional[PreComputedLayouts],
collapse_to_single_dataframe: bool = False,
) -> None:
"""Write pre-computed layouts to the data store (NB: Not implemented!)."""
raise NotImplementedError(
Expand Down

0 comments on commit dea41fe

Please sign in to comment.