Skip to content

Commit

Permalink
Merge pull request #34 from monarch-initiative/checks_pass
Browse files Browse the repository at this point in the history
Passing more checks
  • Loading branch information
oneilsh authored Aug 16, 2024
2 parents 6650a30 + f4c4e4f commit 3656ffe
Show file tree
Hide file tree
Showing 59 changed files with 523 additions and 3,872 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Type: Package
Title: Monarch Knowledge Graph Queries
Description: R package for easy access, manipulation, and analysis of
Monarch KG data Resources.
Version: 1.1.0
Version: 1.2.1
URL: https://github.com/monarch-initiative/monarchr
BugReports: https://github.com/monarch-initiative/monarchr/issues
Authors@R:
Expand Down Expand Up @@ -45,6 +45,5 @@ Imports:
ggraph,
ggplot2
Suggests:
ggraph,
testthat (>= 3.0.0),
rworkflows
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ export(file_engine)
export(file_engine_check)
export(get_engine)
export(kg_join)
export(load_kgx)
export(monarch_engine)
export(monarch_engine_check)
export(monarch_search)
export(monarch_semsim)
export(neo4j_engine)
export(neo4j_engine_check)
export(nodes)
export(save_kgx)
export(summarize_neighborhood)
export(tbl_kgx)
import(dplyr)
Expand All @@ -40,6 +42,7 @@ import(knitr)
import(stringr)
import(tidygraph)
importFrom(archive,archive_read)
importFrom(archive,archive_write_files)
importFrom(assertthat,assert_that)
importFrom(dplyr,slice_head)
importFrom(httr,GET)
Expand All @@ -55,6 +58,7 @@ importFrom(purrr,map_chr)
importFrom(readr,col_character)
importFrom(readr,cols)
importFrom(readr,read_tsv)
importFrom(readr,write_tsv)
importFrom(rlang,enquos)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
Expand Down
13 changes: 13 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@



# monarchr 1.2.1

## New features

* New `save_kgx()` and `load_kgx()` for saving/loading graphs.

## Bugfixes

* Reduced size of package with smaller example data
* Fixed a bug in `expand()` where the engine was not carried through for file-engine backed graphs
* Unit tests dependent on live-hosted Monarch KG now allow for minor changes in result counts

# monarchr 1.1.0

## New features
Expand Down
2 changes: 1 addition & 1 deletion R/cypher_query.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' result <- cypher_query(engine, query, parameters)
#' print(result)
#' @importFrom neo2R cypher
cypher_query <- function(engine, ...) {
cypher_query <- function(engine, query, parameters = NULL, ...) {
UseMethod("cypher_query")
}

2 changes: 1 addition & 1 deletion R/cypher_query_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
#' result <- cypher_query_df(engine, query, parameters)
#' print(result)
#' @importFrom neo2R cypher
cypher_query_df <- function(engine, ...) {
cypher_query_df <- function(engine, query, parameters = list(), ...) {
UseMethod("cypher_query_df")
}
9 changes: 5 additions & 4 deletions R/expand.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
#'
#'
#' @examples
#' ## Using local MONDO KGX file (packaged with monarchr)
#' phenos <- file_engine(system.file("extdata", "mondo_kgx_tsv.tar.gz",
#' package = "monarchr")) |>
#' ## Using example KGX file packaged with monarchr
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#' phenos <- file_engine(filename) |>
#' fetch_nodes(query_ids = "MONDO:0007525") |>
#' expand(predicates = "biolink:has_phenotype",
#' categories = "biolink:PhenotypicFeature")
Expand Down Expand Up @@ -60,6 +60,7 @@ expand <- function(graph,
categories = NULL,
transitive = FALSE,
drop_unused_query_nodes = FALSE,
...) {

...) {
UseMethod("expand")
}
27 changes: 14 additions & 13 deletions R/expand_file_engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ transitive_query_internal <- function(engine,
categories = NULL,
drop_unused_query_nodes = FALSE) {

# TODO: this block will never trigger; the check is done in the main function, remove
if(length(predicates) > 1) {
# we call recusively on each predicate
for(predicate in predicates) {
g2 <- transitive_query_internal(engine,
g,
direction = direction,
predicates = predicate,
categories = categories,
drop_unused_query_nodes = TRUE)
suppressMessages(g <- tidygraph::graph_join(g, g2), classes = "message") # suppress joining info
}
}
# # TODO: this block will never trigger; the check is done in the main function, remove
# if(length(predicates) > 1) {
# # we call recusively on each predicate
# for(predicate in predicates) {
# g2 <- transitive_query_internal(engine,
# g,
# direction = direction,
# predicates = predicate,
# categories = categories,
# drop_unused_query_nodes = TRUE)
# suppressMessages(g <- tidygraph::graph_join(g, g2), classes = "message") # suppress joining info
# }
# }

# assert that direction is "out" or "in"
assert_that(direction == "out" | direction == "in", msg = "Direction must be 'out' or 'in' when using transitive closure.")
Expand Down Expand Up @@ -74,6 +74,7 @@ transitive_query_internal <- function(engine,
filter(id %in% bfs_nodes)
}

attr(bfs_result, "last_engine") <- engine
return(bfs_result)
}

Expand Down
8 changes: 4 additions & 4 deletions R/fetch_nodes.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@
#'
#' @examples
#' # file_engine supports the same features as neo4j_engine
#' # (using the MONDO KGX file packaged with monarchr)
#' filename <- system.file("extdata", "mondo_kgx_tsv.tar.gz", package = "monarchr")
#' # (using the example KGX file packaged with monarchr)
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#'
#' file_engine(filename) |>
#' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
#'
#' # grab all rare diseases
#' # grab all Homo sapiens genes
#' file_engine(filename) |>
#' fetch_nodes("rare" %in_list% subsets & "biolink:Disease" %in_list% category)
#' fetch_nodes(in_taxon_label == "Homo sapiens" & "biolink:Gene" %in_list% category)
#'
#' @export
fetch_nodes <- function(engine, ..., query_ids = NULL, limit = NULL) {
Expand Down
4 changes: 2 additions & 2 deletions R/file_engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#' library(tidygraph)
#' library(dplyr)
#'
#' # Using a local MONDO KGX file (packaged with monarchr)
#' filename <- system.file("extdata", "mondo_kgx_tsv.tar.gz", package = "monarchr")
#' # Using example KGX file packaged with monarchr
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#' engine <- file_engine(filename)
#'
#' res <- engine |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947"))
Expand Down
4 changes: 2 additions & 2 deletions R/get_engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
#' @param fail_if_missing If TRUE, fail if there is no engine associated with the graph.
#' @return A graph engine object.
#' @examples
#' # (using the MONDO KGX file packaged with monarchr)
#' filename <- system.file("extdata", "mondo_kgx_tsv.tar.gz", package = "monarchr")
#' # Using example KGX file packaged with monarchr
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#'
#' g <- file_engine(filename) |>
#' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
Expand Down
12 changes: 6 additions & 6 deletions R/kg_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,21 @@
#' @return A `tbl_kgx()` graph
#' @export
#' @examples
#' ## Using local MONDO KGX file (packaged with monarchr)
#' monarch <- file_engine(system.file("extdata", "mondo_kgx_tsv.tar.gz",
#' package = "monarchr"))
#' ## Using example KGX file packaged with monarchr
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#' engine <- file_engine(filename)
#'
#' eds_and_phenos <- monarch |>
#' eds_and_phenos <- engine |>
#' fetch_nodes(query_ids = "MONDO:0007525") |>
#' expand(predicates = "biolink:has_phenotype",
#' categories = "biolink:PhenotypicFeature")
#'
#' marfan_and_phenos <- monarch |>
#' marfan_and_phenos <- engine |>
#' fetch_nodes(query_ids = "MONDO:0007947") |>
#' expand(predicates = "biolink:has_phenotype",
#' categories = "biolink:PhenotypicFeature")
#'
#' combined <- graph_join(eds_and_phenos, marfan_and_phenos)
#' combined <- kg_join(eds_and_phenos, marfan_and_phenos)
#' print(combined)
kg_join <- function(graph1, graph2, ...) {
UseMethod("kg_join")
Expand Down
12 changes: 6 additions & 6 deletions R/kg_join.tbl_kgx.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#' @import tidygraph
#' @import dplyr
#' @export
kg_join.tbl_kgx <- function(g1, g2) {
nodes_g1 <- nodes(g1)
nodes_g2 <- nodes(g2)
kg_join.tbl_kgx <- function(graph1, graph2, ...) {
nodes_g1 <- nodes(graph1)
nodes_g2 <- nodes(graph2)

# we don't want to keep the to and from columns in the edges, since we'll be rebuilding edges from scratch,
# and be running them through unique()
edges_g1 <- edges(g1) |> select(-to, -from)
edges_g2 <- edges(g2) |> select(-to, -from)
edges_g1 <- edges(graph1) |> select(-to, -from)
edges_g2 <- edges(graph2) |> select(-to, -from)

all_nodes <- unique(nodes_g1 |>
full_join(nodes_g2)) |>
Expand Down Expand Up @@ -53,6 +53,6 @@ kg_join.tbl_kgx <- function(g1, g2) {
all_nodes <- all_nodes |>
select(-idx) # remove the idx column

res <- tbl_kgx(nodes = all_nodes, edges = filled_edges, attach_engine = get_engine(g1, fail_if_missing = FALSE))
res <- tbl_kgx(nodes = all_nodes, edges = filled_edges, attach_engine = get_engine(graph1, fail_if_missing = FALSE))
return(res)
}
8 changes: 6 additions & 2 deletions R/knit_print.tbl_kgx.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,18 @@ clean_df <- function(df) {


#' Specialized print function for KGX graphs in knitted documents
#'
#' @param x A `tbl_kgx` graph to display.
#' @param ... Other arguments (unused).
#' @param show The maximum number of nodes and edges to display.
#' @export
#' @import knitr
#' @importFrom kableExtra kable
#' @importFrom kableExtra kable_styling
#' @importFrom kableExtra column_spec
#' @importFrom dplyr slice_head
knit_print.tbl_kgx <- function(graph, show = 100, ...) {
knit_print.tbl_kgx <- function(x, ..., show = 100) {
graph <- x

g <- order_cols(graph)

nodes_colnames <- colnames(nodes(g))
Expand Down
35 changes: 35 additions & 0 deletions R/load_kgx.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

#' Load a graph from a KGX-formatted .tar.gz file.
#'
#' Given a KGX-formatted tabular KG
#' (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md)
#' loads it as a graph.
#'
#' @param filename File to the graph from. Must end in .tar.gz and conform to KGX specification (see description).
#' @param attach_engine An engine to attach to the graph (optional).
#' @param ... Other parameters (unused)
#' @return A `tbl_kgx` graph.
#' @export
#' @examplesIf monarch_engine_check()
#' phenos <- monarch_engine() |>
#' fetch_nodes(query_ids = "MONDO:0007525") |>
#' expand(predicates = "biolink:has_phenotype",
#' categories = "biolink:PhenotypicFeature")
#'
#' save_kgx(phenos, "phenos.tar.gz")
#'
#' # when loading the graph, we can optionally attach an engine
#' loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine())
#'
#' loaded_phenos
#'
#' # cleanup saved file
#' file.remove("phenos.tar.gz")
load_kgx <- function(filename, attach_engine = NULL, ...) {
e <- file_engine(filename)
g <- e$graph

attr(g, "last_engine") <- attach_engine
g <- order_cols(g)
return(g)
}
32 changes: 16 additions & 16 deletions R/monarch_semsim.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
#' Semantic similarity mapping between two graphs
#'
#'
#' This function calls the Monarch-hosted semantic similarity API to compare two
#' graphs, via the same endpoints as the Monarch Phenotype Explorer:
#' https://monarchinitiative.org/explore#phenotype-explorer.
#'
#'
#' The API returns the best matches between the nodes of the two graphs, based on
#' a specified knowledge-graph-boased metric: the default is `"ancestor_information_content"`,
#' a specified knowledge-graph-boased metric: the default is `"ancestor_information_content"`,
#' also available are `"jaccard_similarity"` and `"phenodigm_score"`. The result is
#' returned as a graph, with `"computed:best_matches"` edges between the nodes of the two input graphs.
#'
#'
#' By default, the function only returns the best matches from the first graph to the second graph, and
#' removes any nodes that do not have a match. If `include_reverse = TRUE`, the function also returns
#' the best matches from the second graph to the first graph.
#'
#'
#' The engine attached to the return graph is that of the query.
#'
#'
#' @param query_graph A tbl_kgx graph.
#' @param target_graph A tbl_kgx graph.
#' @param metric The semantic similarity metric to use. Default is `"ancestor_information_content"`. Also available are `"jaccard_similarity"` and `"phenodigm_score"`.
#' @param include_reverse Whether to include the best matches from the target graph to the query graph. Default is `FALSE`.
#' @param keep_unmatched_targets Whether to keep nodes in the target graph that do not have a match. Default is `FALSE`.
#' @param keep_unmatched Whether to keep nodes in the target graph that do not have a match. Default is `FALSE`.
#' @return A tbl_kgx graph with `"computed:best_matches"` edges between the nodes of the two input graphs.
#' @export
#'
#'
#' @importFrom httr POST content http_status
#' @import tidygraph
#' @import dplyr
Expand All @@ -42,23 +42,23 @@
#' # also inclue the unmatched targets
#' sim <- monarch_semsim(g1, g2, keep_unmatched = TRUE)
#' print(sim)
#'
#'
#' # inclue reverse matches
#' sim <- monarch_semsim(g1, g2, include_reverse = TRUE)
#' print(sim)
#'
monarch_semsim <- function(query_graph,
target_graph,
metric = "ancestor_information_content",
include_reverse = FALSE,
#'
monarch_semsim <- function(query_graph,
target_graph,
metric = "ancestor_information_content",
include_reverse = FALSE,
keep_unmatched = FALSE) {
# check that the metric is valid
assert_that(metric %in% c("ancestor_information_content", "jaccard_similarity", "phenodigm_score"), msg = "metric must be one of 'ancestor_information_content', 'jaccard_similarity', or 'phenodigm_score'")

engine <- monarch_engine()
api_url <- paste0(engine$preferences$monarch_api_url, "/semsim/compare")

# these are called subject_ids and object_ids in the API, but
# these are called subject_ids and object_ids in the API, but
# these don't relate to "subject" and "object" of a predicate
# we use the APIs terminology here
subject_ids <- nodes(query_graph)$id
Expand Down Expand Up @@ -140,4 +140,4 @@ monarch_semsim <- function(query_graph,
g <- tbl_kgx(nodes = nodes_df, edges = keep_edges_df, attach_engine = get_engine(query_graph, fail_if_missing = FALSE))

return(g)
}
}
11 changes: 8 additions & 3 deletions R/order_cols.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
#' Set edge/row data column order according to most recent engine preferences
#'
#' @param g A `tbl_kgx` graph.
#' @import dplyr
#' @import tidygraph
order_cols <- function(g) {
e <- attr(g, "last_engine")
node_prefs <- e$preferences$node_property_priority
edge_prefs <- e$preferences$edge_property_priority
node_prefs <- c("id", "pcategory", "name")
edge_prefs <- c("subject", "predicate", "object")

if(!is.null(e)) {
node_prefs <- e$preferences$node_property_priority
edge_prefs <- e$preferences$edge_property_priority
}

current_node_names <- colnames(nodes(g))
used_prefs_node_names <- node_prefs[node_prefs %in% current_node_names]
Expand Down
Loading

0 comments on commit 3656ffe

Please sign in to comment.