Skip to content

Commit

Permalink
Merge pull request #167 from PixelgenTechnologies/dev
Browse files Browse the repository at this point in the history
Release 0.18.0
  • Loading branch information
ambarrio authored Jul 11, 2024
2 parents 6c1b0c9 + 853b710 commit b3a862d
Show file tree
Hide file tree
Showing 77 changed files with 3,772 additions and 2,028 deletions.
10 changes: 6 additions & 4 deletions .github/actions/build-dev-image/action.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
name: "build-dev-image"
description: "Build a dev image and push it to the Pixelgen container registries"
inputs:
tag:
description: "The tag to use for the image"
required: true
cache-from:
description: "The cache to use for the build"
default: ""
Expand Down Expand Up @@ -61,7 +58,12 @@ runs:
bakeSetCommands.push(...cacheToArray)
bakeSetCommands.push(...platformArray)
bakeSetCommandsText = bakeSetCommands.join('\n')
// Override the version in the container if we are run in a version tag push event
if ( context.eventName === "push" && context.ref.startsWith('refs/tags/v') ) {
bakeSetCommands.push("*.args.VERSION_OVERRIDE=" + "${{ steps.meta.outputs.version }}" )
}
const bakeSetCommandsText = bakeSetCommands.join('\n')
core.setOutput('set', bakeSetCommandsText)
- name: Build and push to ghcr and ecr
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ jobs:
fail-fast: false
matrix:
case:
- D21
- mpx_v1
- mpx_v2

if: needs.pre_job.outputs.should_skip == '{}' || !fromJSON(needs.pre_job.outputs.paths_result).python.should_skip
steps:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v1.3.0" # Use the sha / tag you want to point at
rev: "v1.10.1" # Use the sha / tag you want to point at
hooks:
- id: mypy
exclude: "(^cue.mod/)|(^docs/)"
Expand Down
275 changes: 151 additions & 124 deletions CHANGELOG.md

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@
- [local G](https://doi.org/10.1007/s11749-018-0599-x)

> Bivand, R.S., Wong, D.W.S. Comparing implementations of global and local indicators of spatial association. TEST 27, 716–748 (2018). https://doi.org/10.1007/s11749-018-0599-x
- [dsb](https://doi.org/10.1038/s41467-022-29356-8)

> Mulè, M.P., Martins, A.J. & Tsang, J.S. Normalizing and denoising protein expression data from droplet-based single cell profiling. Nat Commun 13, 2099 (2022). https://doi.org/10.1038/s41467-022-29356-8
1,957 changes: 983 additions & 974 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/pixelator/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class AnalysisParameters:
colocalization_neighbourhood_size: int
colocalization_n_permutations: int
colocalization_min_region_count: int
colocalization_min_marker_count: int


def analyse_pixels(
Expand Down
31 changes: 27 additions & 4 deletions src/pixelator/analysis/colocalization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def colocalization_from_component_edgelist(
n_permutations: int = 50,
use_full_bipartite: bool = True,
min_region_count: int = 5,
min_marker_count: int = 5,
random_seed: Optional[int] = None,
) -> pd.DataFrame:
"""Get the colocalization scores for the component in the given `edgelist`.
Expand All @@ -65,6 +66,8 @@ def colocalization_from_component_edgelist(
projection, defaults to True
:param min_region_count: minimum number of counts in region to consider, defaults
to 5
:param min_marker_count: the minimum number of counts of a marker to calculate
colocalization
:param random_seed: Set the random seed for the permutation tests, defaults to None
:return: a dataframe with computed colocalization scores
:rtype: pd.DataFrame
Expand All @@ -84,6 +87,7 @@ def colocalization_from_component_edgelist(
neighbourhood_size=neighbourhood_size,
n_permutations=n_permutations,
min_region_count=min_region_count,
min_marker_count=min_marker_count,
random_seed=random_seed,
)

Expand All @@ -109,6 +113,7 @@ def colocalization_from_component_graph(
neighbourhood_size: int = 1,
n_permutations: int = 50,
min_region_count: int = 5,
min_marker_count: int = 5,
random_seed: Optional[int] = None,
) -> pd.DataFrame:
"""Compute the colocalization scores for this component graph.
Expand All @@ -121,20 +126,29 @@ def colocalization_from_component_graph(
p-values and z-scores, defaults to 50
:param min_region_count: minimum number of counts in region to consider, defaults
to 5
:param min_marker_count: the minimum number of counts of a marker to calculate
colocalization
:param random_seed: Set the random seed for the permutation tests, defaults to None
:return: a dataframe containing colocalization scores for this component
:rtype: pd.DataFrame
"""
logger.debug("Computing colocalization for component: %s", component_id)
logger.debug("Prepare the graph data for computing colocalization")
marker_counts_by_region = prepare_from_graph(graph, n_neighbours=neighbourhood_size)

raw_marker_counts = graph.node_marker_counts
# Record markers to keep
# Remove markers with zero variance and markers below minimum marker count
markers_to_keep = raw_marker_counts.columns[
(raw_marker_counts != 0).any(axis=0)
& (raw_marker_counts.nunique() > 1)
& (raw_marker_counts.sum() >= min_marker_count)
]

marker_counts_by_region = prepare_from_graph(graph, n_neighbours=neighbourhood_size)
marker_counts_by_region = marker_counts_by_region[markers_to_keep]
marker_counts_by_region = filter_by_region_counts(
marker_counts_by_region, min_region_counts=min_region_count
)
marker_counts_by_region = filter_by_unique_values(
marker_counts_by_region, at_least_n_unique=2
)

nrow, ncols = marker_counts_by_region.shape
if nrow == 0:
Expand Down Expand Up @@ -187,6 +201,7 @@ def colocalization_scores(
neighbourhood_size: int = 1,
n_permutations: int = 50,
min_region_count: int = 5,
min_marker_count: int = 5,
random_seed: Optional[int] = None,
) -> pd.DataFrame:
"""Compute colocalization scores for antibody pairs.
Expand Down Expand Up @@ -225,6 +240,8 @@ def colocalization_scores(
:param min_region_count: The minimum size of the region (e.g. number
of counts in the neighbourhood) required
for it to be considered
:param min_marker_count: the minimum number of counts of a marker to calculate
colocalization
:param random_seed: Set a random seed for the permutation function
:returns: a pd.DataFrame of scores
:rtype: pd.DataFrame
Expand Down Expand Up @@ -273,6 +290,7 @@ def data():
neighbourhood_size=neighbourhood_size,
n_permutations=n_permutations,
min_region_count=min_region_count,
min_marker_count=min_marker_count,
random_seed=random_seed,
)

Expand Down Expand Up @@ -402,6 +420,7 @@ def __init__(
neighbourhood_size: int,
n_permutations: int,
min_region_count: int,
min_marker_count: int,
):
"""Initialize the ColocalizationAnalysis.
Expand All @@ -413,11 +432,14 @@ def __init__(
:param min_region_count: The minimum size of the region (e.g. number
of counts in the neighbourhood) required
for it to be considered for colocalization analysis
:param min_marker_count: the minimum number of counts of a marker to calculate
colocalization
"""
self.transformation_type = transformation_type
self.neighbourhood_size = neighbourhood_size
self.n_permutations = n_permutations
self.min_region_count = min_region_count
self.min_marker_count = min_marker_count

def run_on_component(self, component: Graph, component_id: str) -> pd.DataFrame:
"""Run colocalization analysis on the component."""
Expand All @@ -429,6 +451,7 @@ def run_on_component(self, component: Graph, component_id: str) -> pd.DataFrame:
neighbourhood_size=self.neighbourhood_size,
n_permutations=self.n_permutations,
min_region_count=self.min_region_count,
min_marker_count=self.min_marker_count,
)

def post_process_data(self, data: pd.DataFrame) -> pd.DataFrame:
Expand Down
66 changes: 66 additions & 0 deletions src/pixelator/analysis/normalization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Functions for the colocalization analysis in pixelator.
Copyright © 2024 Pixelgen Technologies AB.
"""

from typing import List, Union

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler


def _regress_out_confounder(pheno, exprs, rcond=1e-8):
"""Linear regression to remove confounding factors from abundance data."""
design_matrix = np.column_stack((np.ones((len(pheno), 1)), pheno))
coefficients, res, rank, s = np.linalg.lstsq(design_matrix, exprs, rcond=rcond)
beta = coefficients[1:] # remove intercept term
return exprs - design_matrix[:, 1:].dot(beta)


def _get_background_abundance(dataframe: pd.DataFrame, axis=0):
"""Fit a double gaussian distribution to the abundance data and return the mean of the first gaussian as an estimation of the background level."""
background = pd.Series(index=dataframe.index if axis == 0 else dataframe.columns)
scores = pd.Series(index=dataframe.index if axis == 0 else dataframe.columns)
gmm = GaussianMixture(n_components=2, max_iter=1000, random_state=0)
if axis not in {0, 1}:
raise ValueError(f"Axis was {axis}. Must be 0 or 1")
ax_iter = dataframe.index if axis == 0 else dataframe.columns
for i in ax_iter:
current_axis = dataframe.loc[i, :] if axis == 0 else dataframe.loc[:, i]
gmm = gmm.fit(current_axis.to_frame())
background[i] = np.min(gmm.means_)
scores[i] = np.abs(gmm.means_[1] - gmm.means_[0]) / np.sum(gmm.covariances_)
return background, scores


def dsb_normalize(
raw_abundance: pd.DataFrame, isotype_controls: Union[List, None] = None
):
"""empty-droplet-free method as implemented in Mulè et. al. dsb package.
The normalization steps are: 1- log1p transformation, 2- remove background
abundance per marker, 3- regularize abundance per component.
:param raw_abundance: the raw abundance count data.
:param isotype_controls: list of isotype controls.
:return: normalized abundance data.
"""
log_abundance = np.log1p(raw_abundance)
marker_background, _ = _get_background_abundance(log_abundance, axis=1)
log_abundance = log_abundance - marker_background
component_background, _ = _get_background_abundance(log_abundance, axis=0)

if isotype_controls is not None:
control_signals = log_abundance.loc[:, isotype_controls]
control_signals["component_background"] = component_background
control_signals = StandardScaler().fit_transform(control_signals)
pheno = PCA(n_components=1).fit_transform(control_signals)
else:
raise ValueError(f"At least one isotype control must be provided.")

normalized_abundance = _regress_out_confounder(pheno, log_abundance)

return normalized_abundance
13 changes: 12 additions & 1 deletion src/pixelator/cli/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
)
@click.option(
"--colocalization-transformation",
default="log1p",
default="rate-diff",
required=False,
type=click.Choice(get_args(TransformationTypes)),
show_default=True,
Expand Down Expand Up @@ -134,6 +134,14 @@
"valid for computing colocalization"
),
)
@click.option(
"--colocalization-min-marker-count",
default=5,
required=False,
type=click.IntRange(min=0),
show_default=True,
help=("The minimum number of marker counts in component for colocalization"),
)
@output_option
@click.pass_context
@timer
Expand All @@ -150,6 +158,7 @@ def analysis(
colocalization_neighbourhood_size,
colocalization_n_permutations,
colocalization_min_region_count,
colocalization_min_marker_count,
output,
):
"""
Expand All @@ -171,6 +180,7 @@ def analysis(
colocalization_neighbourhood_size=colocalization_neighbourhood_size,
colocalization_n_permutations=colocalization_n_permutations,
colocalization_min_region_count=colocalization_min_region_count,
colocalization_min_marker_count=colocalization_min_marker_count,
)

# some basic sanity check on the input files
Expand Down Expand Up @@ -214,6 +224,7 @@ def analysis(
neighbourhood_size=colocalization_neighbourhood_size,
n_permutations=colocalization_n_permutations,
min_region_count=colocalization_min_region_count,
min_marker_count=colocalization_min_marker_count,
)
)

Expand Down
39 changes: 36 additions & 3 deletions src/pixelator/cli/collapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Copyright © 2022 Pixelgen Technologies AB.
"""

import sys
from collections import defaultdict
from concurrent import futures
from pathlib import Path
Expand All @@ -25,6 +26,38 @@
)


def _handle_errors(jobs, executor):
for job in jobs:
exception = job.exception()
if exception is None:
continue

logger.error(
"Found an issue in the process pool. Trying to determine what went wrong and set the correct exit code. Exception was: %s",
exception,
)
process_map = executor._processes
for pid in process_map.keys():
exit_code = process_map[pid].exitcode
if exit_code is not None and exit_code != 0:
logger.error(
"The child process in the process pool returned a non-zero exit code: %s.",
exit_code,
)
# If we have an out of memory exception, make sure we exit with that.
if abs(exit_code) == 9:
logger.error(
"One of the child processes was killed (exit code: 9). "
"Usually this is caused by the out-of-memory killer terminating the process. "
"The parent process will return an exit code of 137 to indicate that it terminated because of a kill signal in the child process."
)
sys.exit(137)
logger.error(
"Was unable to determine what when wrong in process pool. Will raise original exception."
)
raise exception


@click.command(
"collapse",
short_help=(
Expand Down Expand Up @@ -238,12 +271,12 @@ def collapse(
min_count=min_count,
)
)
jobs = list(futures.as_completed(jobs))
_handle_errors(jobs, executor)

total_input_reads = 0
tmp_files = []
for job in futures.as_completed(jobs):
if job.exception() is not None:
raise job.exception()
for job in jobs:
# the worker returns a path to a file (temp antibody edge list)
tmp_file, input_reads_count = job.result()
if tmp_file is not None:
Expand Down
2 changes: 1 addition & 1 deletion src/pixelator/cli/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"--layout-algorithm",
required=False,
multiple=True,
default=["pmds_3d"],
default=["wpmds_3d"],
help="Select a layout algorithm to use. This can be specified multiple times to compute multiple layouts. Default: pmds_3d",
type=click.Choice(get_args(SupportedLayoutAlgorithm)),
)
Expand Down
2 changes: 1 addition & 1 deletion src/pixelator/cli/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def list_single_cell_panels(ctx: click.Context, param: Any, value: Any) -> None:
if not value or ctx.resilient_parsing:
return

options = list(config.panels.keys())
options = config.list_panel_names(include_aliases=True)
for option in options:
click_echo(option)

Expand Down
Loading

0 comments on commit b3a862d

Please sign in to comment.