Skip to content

Commit

Permalink
Merge pull request #198 from PixelgenTechnologies/feature/exe-2061-re…
Browse files Browse the repository at this point in the history
…solve-k-value-bug

Better error message when there are fewer nodes than the number of dimensions in pmds layout
  • Loading branch information
johandahlberg authored Nov 13, 2024
2 parents 9834f48 + b25a7ec commit 433e234
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 4 deletions.
9 changes: 7 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

_ Add `depth` column to `discarded_edgelist.parquet` output of the GRAPH stage that indicates at which refinement iteration the edge is removed.
_ Add `edges_removed_in_multiplet_recovery_first_iteration`, `edges_removed_in_multiplet_recovery_refinement` and `fraction_edges_removed_in_refinement` to graph report.json.
- Add `depth` column to `discarded_edgelist.parquet` output of the GRAPH stage that indicates at which refinement iteration the edge is removed.
- Add `edges_removed_in_multiplet_recovery_first_iteration`, `edges_removed_in_multiplet_recovery_refinement` and `fraction_edges_removed_in_refinement` to graph report.json.
- Add `is_potential_doublet` and `n_edges_to_split_doublet` columns to adata.obs.
- Add `fraction_potential_doublets` and `n_edges_to_split_potential_doublets` to annotate report.json.
- Add `--max-edges-to-split` option to `graph` to specify the maximum number of edges that can be removed between two sub-components during multiplet recovery.
Expand All @@ -30,6 +30,11 @@ _ Add `edges_removed_in_multiplet_recovery_first_iteration`, `edges_removed_in

- Remove the `components_recovered.csv` output from the GRAPH stage.


### Fixed

- better error message when the number of nodes is lower than the number of requested dimensions in `pmds_layout`.

## [0.18.3] - 2024-09-26

### Fixed
Expand Down
11 changes: 9 additions & 2 deletions src/pixelator/graph/backends/implementations/_networkx.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,8 +788,9 @@ def pmds_layout(
if not nx.is_connected(g):
raise ValueError("Only connected graphs are supported.")

if pivots >= len(g.nodes):
total_nodes = len(g.nodes)
n_nodes = len(g.nodes)
if pivots >= n_nodes:
total_nodes = n_nodes
warnings.warn(
f"'pivots' ({pivots}) should be less than the number of "
f"nodes ({total_nodes}) in the graph. Using all nodes as 'pivots'."
Expand All @@ -802,6 +803,11 @@ def pmds_layout(
if pivots < dim:
raise ValueError("'pivots' must be greater than or equal to dim.")

if n_nodes <= dim:
raise ValueError(
f"Number of nodes in the graph ({n_nodes}) must be greater than or equal to 'dim' ({dim})."
)

pivot_lower_bound = np.min([np.floor(0.2 * len(g.nodes)), 50])
if pivots < pivot_lower_bound:
raise ValueError(
Expand Down Expand Up @@ -862,6 +868,7 @@ def pmds_layout(
# Compute SVD and use distances to compute coordinates for all nodes
# in an abstract cartesian space
_, _, Vh = sp.sparse.linalg.svds(D_pivs_centered, k=dim, random_state=seed)

coordinates = D_pivs_centered @ np.transpose(Vh)
# Flip the coordinates here to make sure that we get the correct coordinate ordering
# i.e. iqr(x) > iqr(y) > iqr(z)
Expand Down
41 changes: 41 additions & 0 deletions tests/pixeldataset/test_precomputed_layouts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest
from pandas.testing import assert_frame_equal

from pixelator.pixeldataset import PixelDataset
from pixelator.pixeldataset.precomputed_layouts import (
PreComputedLayouts,
aggregate_precomputed_layouts,
Expand Down Expand Up @@ -526,6 +527,46 @@ def test_generate_precomputed_layouts_for_components_with_single_layout_algorith
df = precomputed_layouts.to_df()
assert set(df["layout"]) == {"pmds_3d"}

@pytest.mark.test_this
def test_generate_precomputed_layouts_on_to_small_components(self):
edgelist = pd.DataFrame.from_dict(
{
"upia": ["A", "B", "C"],
"upib": ["B", "C", "A"],
"umi": ["G", "H", "I"],
"sequence": ["J", "K", "L"],
"component": [
"2ac2ca983a4b82dd",
"2ac2ca983a4b82dd",
"2ac2ca983a4b82dd",
],
"marker": ["CD3", "CD3", "CD3"],
"count": [1, 1, 1],
}
)

class MockAnnData:
def __init__(self):
self.n_obs = 10

def copy(self):
return self

@property
def obs(self):
return pd.DataFrame(index=edgelist["component"].unique())

@property
def var(self):
return pd.DataFrame(index=edgelist["marker"].unique())

pixel_dataset = PixelDataset.from_data(MockAnnData(), edgelist=edgelist)
layout_algorithm = "wpmds_3d"
with pytest.raises(ValueError):
generate_precomputed_layouts_for_components(
pixel_dataset, layout_algorithms=layout_algorithm
)


@pytest.mark.integration_test
class TestGeneratePrecomputedLayoutsForComponentsIntegrationTest:
Expand Down

0 comments on commit 433e234

Please sign in to comment.