From b3fe03e3a22ff5ff2454037be7d872d2cffb7173 Mon Sep 17 00:00:00 2001 From: "Watal M. Iwasaki" Date: Mon, 3 Jun 2024 06:43:30 +0900 Subject: [PATCH] :construction: Update and export graph-related functions - Cache edge_ages() for repeated sampling - Why distance("unweighted") runs slower than "dijkstra"? --- NAMESPACE | 5 ++++ R/graph.R | 73 +++++++++++++++++++++++++++++++++++++--------- R/ms.R | 37 ++++++++++++++++------- R/plot-genealogy.R | 4 ++- man/graph.Rd | 33 ++++++++++++++++++++- man/ms.Rd | 7 +++++ 6 files changed, 133 insertions(+), 26 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3041e23..9e2de28 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,7 +15,10 @@ export(count_extant) export(dist_euclidean) export(dist_genealogy) export(dist_vaf) +export(distances_from_origin) export(distances_mrs) +export(distances_upstream) +export(edge_lengths) export(evaluate_mrs) export(extract_history) export(filter_common_ancestors) @@ -51,9 +54,11 @@ export(sample_bulk) export(sample_random_regions) export(sample_uniform_regions) export(save_serial_section) +export(sink_cells) export(snapshot_surface) export(sort_vaf) export(stats_dispersion) +export(subgraph_upstream) export(subtree) export(summarize_capture_rate) export(summarize_demography) diff --git a/R/graph.R b/R/graph.R index 33e45f2..38fcd64 100644 --- a/R/graph.R +++ b/R/graph.R @@ -19,14 +19,21 @@ as_symbolic_edgelist = function(population) { } #' @details -#' `subtree` extracts subgraph among terminal nodes. +#' `subtree()` and `subgraph_upstream()` extracts subgraph among terminal nodes. #' @param graph igraph #' @param nodes integer cell IDs +#' @param vids vertex IDs #' @param trim whether to remove common ancestors older than MRCA. #' @rdname graph #' @export subtree = function(graph, nodes = integer(0L), trim = FALSE) { vids = igraphlite::as_vids(graph, nodes) + subgraph_upstream(graph, vids, trim = trim) +} + +#' @rdname graph +#' @export +subgraph_upstream = function(graph, vids = integer(0L), trim = FALSE) { vids = upstream_vertices(graph, vids, trim = trim) igraphlite::induced_subgraph(graph, vids) } @@ -59,6 +66,29 @@ internal_nodes = function(graph, nodes, sensitivity) { as.integer(names(counts)[(counts / n) > sensitivity]) } +#' @details +#' `distances_from_origin()` and `distances_upstream()` are a shorthand of +#' `distances(..., mode = 2L)`. +#' @param weights A numeric or logical. Edge attribute "weight" is used if TRUE. +#' @rdname graph +#' @export +distances_from_origin = function(graph, nodes = sink_cells(graph), weights = numeric(0L), trim = FALSE) { + vids = igraphlite::as_vids(graph, nodes) + distances_upstream(graph, vids, weights = weights, trim = trim) +} + +#' @rdname graph +#' @export +distances_upstream = function(graph, vids = numeric(0), weights = numeric(0L), trim = FALSE) { + src = if (trim) { + common_ancestors(graph, vids)[1L] + } else { + igraphlite::Vsource(graph) + } + res = igraphlite::distances(graph, vids, src, weights = weights, mode = 2L) + as.vector(res) +} + upstream_vertices = function(graph, vids, trim = FALSE) { vlist = neighborhood_in(graph, vids) vids = unique(unlist(vlist, use.names = FALSE)) @@ -69,6 +99,17 @@ upstream_vertices = function(graph, vids, trim = FALSE) { vids } +get_mrca = function(graph, nodes) { + ca = common_ancestors(graph, igraphlite::as_vids(graph, nodes)) + igraphlite::as_vnames(graph, ca[1L]) +} + +common_ancestors = function(graph, vids) { + vlist = neighborhood_in(graph, vids) + vids = unique(unlist(vlist, use.names = FALSE)) + Reduce(intersect, vlist) +} + neighborhood_out = function(graph, vids) { igraphlite::neighborhood(graph, vids, order = 1073741824L, mode = 1L) } @@ -91,22 +132,16 @@ paths_to_source = function(graph, nodes = integer(0L)) { lapply(res, function(x) vnames[x]) } -shortest_dist_from_source = function(graph, vids = numeric(0)) { - res = igraphlite::distances(graph, vids, igraphlite::Vsource(graph), mode = 2L, algorithm = "unweighted") - as.integer(res) -} - -distances_from_origin = function(graph, nodes = integer(0L)) { - vids = igraphlite::as_vids(graph, nodes) - shortest_dist_from_source(graph, vids) -} - -sink_nodes = function(graph) { +#' @details +#' `sink_cells()` returns the cell IDs of the sink nodes. +#' @rdname graph +#' @export +sink_cells = function(graph) { igraphlite::Vnames(graph)[igraphlite::is_sink(graph)] } sinks = function(graph, nodes) { - .sink = sink_nodes(graph) + .sink = sink_cells(graph) paths = paths_to_sink(graph, nodes) lapply(paths, function(x) x[x %in% .sink]) } @@ -133,3 +168,15 @@ count_sink = function(graph, nodes = integer(0L)) { sum(igraphlite::is_sink(graph)) } } + +copy_edge_attr = function(subgraph, graph) { + if (ncol(graph$Eattr) > 0L) { + eattr_names = names(igraphlite::edge_attr(graph)) + lhs = as.data.frame(subgraph) |> + dplyr::select(!dplyr::any_of(eattr_names)) + rhs = as.data.frame(graph) + subgraph$Eattr = dplyr::left_join(lhs, rhs, by = c("from", "to")) |> + dplyr::select(!c("from", "to")) + } + invisible(subgraph) +} diff --git a/R/ms.R b/R/ms.R index 9fd7748..202ee0a 100644 --- a/R/ms.R +++ b/R/ms.R @@ -38,18 +38,33 @@ mutate_clades = function(graph, mu = NULL, segsites = NULL) { dplyr::mutate(carriers = paths_to_sink(graph, .data$origin)) } -genetic_distance = function(graph, vids = igraphlite::Vsink(graph), mu = 0, accel = 0) { - segments = if (accel > 0) { - age = distances_from_origin(graph) - (1 + accel)**age - } else { - rep_len(1.0, graph$vcount) +#' @details +#' `edge_lengths()` calculates lengths of edges on a given genealogy tree. +#' @param accel assumption undocumented yet +#' @rdname ms +#' @export +edge_lengths = function(graph, mu = 0, accel = 0) { + # distances(): "unweighted" is slower than "dijkstra" + lens = rep_len(1, graph$ecount) + if (accel > 0) { + lens = (1 + accel)**edge_ages(graph) } if (mu > 0) { - segments = stats::rpois(length(segments), mu * segments) + lens = stats::rpois(graph$ecount, mu * lens) } - segments[igraphlite::is_source(graph)] = 0 - ancestors = neighborhood_in(graph, vids) - names(ancestors) = igraphlite::Vnames(graph)[vids] - purrr::map_dbl(ancestors, \(v) sum(segments[v])) + lens +} + +edge_ages = function(graph) { + age = igraphlite::edge_attr(graph, "age") + if (is.null(age)) { + age = distances_upstream(graph, graph$to, trim = FALSE) + igraphlite::edge_attr(graph, "age") = age + } + age +} + +mutate_edges = function(graph, mu = 0, accel = 0) { + igraphlite::edge_attr(graph, "weight") = edge_lengths(graph, mu = mu, accel = accel) + graph } diff --git a/R/plot-genealogy.R b/R/plot-genealogy.R index 71a6e1a..87496b4 100644 --- a/R/plot-genealogy.R +++ b/R/plot-genealogy.R @@ -22,7 +22,9 @@ augment_genealogy = function(graph, mu = 0, accel = 0) { TRUE ~ "internal" )) if (mu > 0 || accel > 0) { - named_dist = genetic_distance(graph, igraphlite::V(graph), mu = mu, accel = accel) + elengths = edge_lengths(graph, mu = mu, accel = accel) + udist = distances_upstream(graph, weights = elengths) + named_dist = stats::setNames(udist, vnames) res$d = named_dist[as.character(res$node)] res$d_parent = named_dist[as.character(res$parent)] } diff --git a/man/graph.Rd b/man/graph.Rd index 0e0ef73..6765133 100644 --- a/man/graph.Rd +++ b/man/graph.Rd @@ -3,17 +3,39 @@ \name{make_igraph} \alias{make_igraph} \alias{subtree} +\alias{subgraph_upstream} \alias{trim_root} \alias{internal_nodes} +\alias{distances_from_origin} +\alias{distances_upstream} +\alias{sink_cells} \title{Functions depending on igraph} \usage{ make_igraph(population) subtree(graph, nodes = integer(0L), trim = FALSE) +subgraph_upstream(graph, vids = integer(0L), trim = FALSE) + trim_root(graph) internal_nodes(graph, nodes, sensitivity) + +distances_from_origin( + graph, + nodes = sink_cells(graph), + weights = numeric(0L), + trim = FALSE +) + +distances_upstream( + graph, + vids = numeric(0), + weights = numeric(0L), + trim = FALSE +) + +sink_cells(graph) } \arguments{ \item{population}{tbl} @@ -24,7 +46,11 @@ internal_nodes(graph, nodes, sensitivity) \item{trim}{whether to remove common ancestors older than MRCA.} +\item{vids}{vertex IDs} + \item{sensitivity}{minimum allele frequency} + +\item{weights}{A numeric or logical. Edge attribute "weight" is used if TRUE.} } \description{ Functions depending on igraph @@ -32,9 +58,14 @@ Functions depending on igraph \details{ \code{make_igraph} converts raw population tbl into graph. -\code{subtree} extracts subgraph among terminal nodes. +\code{subtree()} and \code{subgraph_upstream()} extracts subgraph among terminal nodes. \code{trim_root()} removes ancestral nodes older than MRCA. \code{internal_nodes} selects major common ancestors above threshold. + +\code{distances_from_origin()} and \code{distances_upstream()} are a shorthand of +\code{distances(..., mode = 2L)}. + +\code{sink_cells()} returns the cell IDs of the sink nodes. } diff --git a/man/ms.Rd b/man/ms.Rd index 8126e6c..fcb58f8 100644 --- a/man/ms.Rd +++ b/man/ms.Rd @@ -2,9 +2,12 @@ % Please edit documentation in R/ms.R \name{make_sample} \alias{make_sample} +\alias{edge_lengths} \title{Sprinkle mutations on genealogy} \usage{ make_sample(graph, nsam = 0L, mu = NULL, segsites = NULL) + +edge_lengths(graph, mu = 0, accel = 0) } \arguments{ \item{graph}{igraph} @@ -14,10 +17,14 @@ make_sample(graph, nsam = 0L, mu = NULL, segsites = NULL) \item{mu}{mutation rate per cell division (ignored if segsites is given)} \item{segsites}{number of segregating sites} + +\item{accel}{assumption undocumented yet} } \description{ Sprinkle mutations on genealogy } \details{ \code{make_sample} creates a genotype matrix from a given genealogy tree. + +\code{edge_lengths()} calculates lengths of edges on a given genealogy tree. }