Skip to content

Commit

Permalink
feat: adding /genes/search to annonars
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Aug 30, 2023
1 parent be2c3ca commit 8565256
Show file tree
Hide file tree
Showing 3 changed files with 264 additions and 7 deletions.
176 changes: 176 additions & 0 deletions src/server/actix_server/genes_search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
//! Implementation of `/genes/search` that allows to search for genes by symbol etc.
use actix_web::{
get,
web::{self, Data, Json, Path},
Responder,
};

use crate::server::GeneNames;

use super::error::CustomError;
use serde_with::{formats::CommaSeparator, StringWithSeparator};

/// The allowed fields to search in.
#[derive(
serde::Serialize,
serde::Deserialize,
strum::Display,
strum::EnumString,
Debug,
Clone,
Copy,
PartialEq,
Eq,
)]
#[serde(rename_all = "snake_case")]
#[strum(serialize_all = "snake_case")]
enum Fields {
/// HGNC ID field
HgncId,
/// Symbol field
Symbol,
/// Name field
Name,
/// Alias symbols field
AliasSymbol,
/// Alias names field
AliasName,
/// ENSEMBL gene ID
EnsemblGeneId,
/// NCBI gene ID
NcbiGeneId,
}

/// Parameters for `handle`.
#[serde_with::skip_serializing_none]
#[serde_with::serde_as]
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
#[serde(rename_all = "snake_case")]
struct Request {
/// The string to search for.
pub q: String,
/// The fields to search in.
#[serde_as(as = "Option<StringWithSeparator::<CommaSeparator, Fields>>")]
pub fields: Option<Vec<Fields>>,
}

/// A scored result.
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
struct Scored<T> {
/// The score.
pub score: f32,
/// The result.
pub data: T,
}

/// Result for `handle`.
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
#[serde_with::skip_serializing_none]
struct Container {
// TODO: add data version
/// The resulting gene information.
pub genes: Vec<Scored<GeneNames>>,
}

/// Query for annotations for one variant.
#[allow(clippy::option_map_unit_fn)]
#[get("/genes/search")]
async fn handle(

Check warning on line 78 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L77-L78

Added lines #L77 - L78 were not covered by tests
data: Data<crate::server::WebServerData>,
_path: Path<()>,
query: web::Query<Request>,
) -> actix_web::Result<impl Responder, CustomError> {
if query.q.len() < 2 {
return Ok(Json(Container {

Check warning on line 84 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L83-L84

Added lines #L83 - L84 were not covered by tests
// server_version: VERSION.to_string(),
// builder_version,
genes: Vec::new(),

Check warning on line 87 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L87

Added line #L87 was not covered by tests
}));
}

let genes_db = data.genes.as_ref().ok_or(CustomError::new(anyhow::anyhow!(

Check warning on line 91 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L91

Added line #L91 was not covered by tests
"genes database not available"
)))?;

let max_items = 100;

Check warning on line 95 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L95

Added line #L95 was not covered by tests

let q = &query.q;
let fields: Vec<Fields> = if let Some(fields) = query.fields.as_ref() {
fields.clone()

Check warning on line 99 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L97-L99

Added lines #L97 - L99 were not covered by tests
} else {
Vec::new()

Check warning on line 101 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L101

Added line #L101 was not covered by tests
};

// The fields contain the given field or are empty.
let fields_contains = |field: &Fields| -> bool { fields.is_empty() || fields.contains(field) };

Check warning on line 105 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L105

Added line #L105 was not covered by tests

let mut genes = genes_db

Check warning on line 107 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L107

Added line #L107 was not covered by tests
.gene_strings
.iter()
.map(|gn| -> Scored<GeneNames> {
let score = if (fields_contains(&Fields::HgncId) && &gn.hgnc_id == q)
|| (fields_contains(&Fields::Symbol) && &gn.symbol == q)
|| (fields_contains(&Fields::Symbol) && &gn.symbol == q)
|| (fields_contains(&Fields::Name) && &gn.name == q)
|| (fields_contains(&Fields::EnsemblGeneId)
&& gn.ensembl_gene_id.iter().any(|s| s == q))
|| (fields_contains(&Fields::NcbiGeneId)
&& gn.ensembl_gene_id.iter().any(|s| s == q))

Check warning on line 118 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L110-L118

Added lines #L110 - L118 were not covered by tests
{
1f32
} else if fields_contains(&Fields::Symbol) && gn.symbol.contains(q) {
q.len() as f32 / gn.symbol.len() as f32
} else if fields_contains(&Fields::Name) && gn.name.contains(q) {
q.len() as f32 / gn.name.len() as f32
} else if fields_contains(&Fields::AliasSymbol)
&& gn.alias_symbol.iter().any(|s| s.contains(q))

Check warning on line 126 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L120-L126

Added lines #L120 - L126 were not covered by tests
{
gn.alias_symbol

Check warning on line 128 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L128

Added line #L128 was not covered by tests
.iter()
.map(|s| {
if s.contains(q) {
q.len() as f32 / s.len() as f32

Check warning on line 132 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L130-L132

Added lines #L130 - L132 were not covered by tests
} else {
0f32

Check warning on line 134 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L134

Added line #L134 was not covered by tests
}
})
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))

Check warning on line 137 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L137

Added line #L137 was not covered by tests
.unwrap_or(0f32)
} else if fields_contains(&Fields::AliasName)
&& gn.alias_name.iter().any(|s| s.contains(q))

Check warning on line 140 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L139-L140

Added lines #L139 - L140 were not covered by tests
{
gn.alias_name

Check warning on line 142 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L142

Added line #L142 was not covered by tests
.iter()
.map(|s| {
if s.contains(q) {
q.len() as f32 / s.len() as f32

Check warning on line 146 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L144-L146

Added lines #L144 - L146 were not covered by tests
} else {
0f32

Check warning on line 148 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L148

Added line #L148 was not covered by tests
}
})
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))

Check warning on line 151 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L151

Added line #L151 was not covered by tests
.unwrap_or(0f32)
} else {
0f32

Check warning on line 154 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L154

Added line #L154 was not covered by tests
};
Scored {
score,
data: gn.clone(),

Check warning on line 158 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L156-L158

Added lines #L156 - L158 were not covered by tests
}
})
.filter(|s| s.score > 0.0)

Check warning on line 161 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L161

Added line #L161 was not covered by tests
.take(max_items)
.collect::<Vec<_>>();

genes.sort_by(|a, b| {
(b.score, &b.data.symbol)
.partial_cmp(&(a.score, &b.data.symbol))
.unwrap_or(std::cmp::Ordering::Equal)

Check warning on line 168 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L165-L168

Added lines #L165 - L168 were not covered by tests
});

Ok(Json(Container {

Check warning on line 171 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L171

Added line #L171 was not covered by tests
// server_version: VERSION.to_string(),
// builder_version,
genes,

Check warning on line 174 in src/server/actix_server/genes_search.rs

View check run for this annotation

Codecov / codecov/patch

src/server/actix_server/genes_search.rs#L174

Added line #L174 was not covered by tests
}))
}
4 changes: 3 additions & 1 deletion src/server/actix_server/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod annos_variant;
pub mod error;
pub mod fetch;
pub mod genes_info;
pub mod genes_search;

use actix_web::{middleware::Logger, web::Data, App, HttpServer};

Expand All @@ -24,7 +25,8 @@ pub async fn main(args: &Args, dbs: Data<WebServerData>) -> std::io::Result<()>
.service(annos_variant::handle)
.service(annos_range::handle)
.service(annos_db_info::handle)
.service(genes_info::handle);
.service(genes_info::handle)
.service(genes_search::handle);
app.wrap(Logger::default())
})
.bind((args.listen_host.as_str(), args.listen_port))?
Expand Down
91 changes: 85 additions & 6 deletions src/server/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@ use std::time::Instant;

use clap::Parser;
use indicatif::ParallelProgressIterator;
use prost::Message;
use rayon::prelude::*;

use crate::common::{self, cli::GenomeRelease};
use crate::{
common::{self, cli::GenomeRelease},
genes::pbs,
};

/// Encode annotation database.
#[derive(
Expand Down Expand Up @@ -92,11 +96,32 @@ impl AnnoDb {
}
}

/// Identifier / name information for one gene.
#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)]
pub struct GeneNames {
/// HGNC gene ID.
pub hgnc_id: String,
/// HGNC gene symbol.
pub symbol: String,
/// Gene name from HGNC.
pub name: String,
/// HGNC alias symbols.
pub alias_symbol: Vec<String>,
/// HGNC alias names.
pub alias_name: Vec<String>,
/// ENSEMBL gene ID.
pub ensembl_gene_id: Option<String>,
/// NCBI gene ID.
pub ncbi_gene_id: Option<String>,
}

/// Gene information database.
#[derive(Debug)]
pub struct GeneInfoDb {
/// The database.
pub db: rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>,
/// Gene information to keep in memory (for `/genes/search`).
pub gene_strings: Vec<GeneNames>,
}

/// Genome-release specific annotation for each database.
Expand Down Expand Up @@ -228,6 +253,47 @@ fn open_db(
res
}

/// Obtain gene names from the genes RocksDB.
fn extract_gene_names(

Check warning on line 257 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L257

Added line #L257 was not covered by tests
genes_db: &rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>,
) -> Result<Vec<GeneNames>, anyhow::Error> {
let mut result = Vec::new();

Check warning on line 260 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L260

Added line #L260 was not covered by tests

let cf_read = genes_db.cf_handle("genes").unwrap();
let mut iter = genes_db.raw_iterator_cf(&cf_read);
iter.seek(b"");
while iter.valid() {
if let Some(iter_value) = iter.value() {
let record = pbs::Record::decode(std::io::Cursor::new(iter_value))?;
let pbs::Record { hgnc, .. } = record;
if let Some(hgnc) = hgnc {

Check warning on line 269 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L262-L269

Added lines #L262 - L269 were not covered by tests
let pbs::HgncRecord {
hgnc_id,
symbol,
name,
alias_symbol,
alias_name,
ensembl_gene_id,
entrez_id,

Check warning on line 277 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L271-L277

Added lines #L271 - L277 were not covered by tests
..
} = hgnc;
result.push(GeneNames {

Check warning on line 280 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L280

Added line #L280 was not covered by tests
hgnc_id,
symbol,
name,
alias_symbol,
alias_name,
ensembl_gene_id,
ncbi_gene_id: entrez_id,
})
}
}
iter.next();

Check warning on line 291 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L291

Added line #L291 was not covered by tests
}

Ok(result)

Check warning on line 294 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L294

Added line #L294 was not covered by tests
}

/// Main entry point for `server rest` sub command.
pub fn run(args_common: &common::cli::Args, args: &Args) -> Result<(), anyhow::Error> {
tracing::info!("args_common = {:?}", &args_common);
Expand Down Expand Up @@ -255,11 +321,14 @@ pub fn run(args_common: &common::cli::Args, args: &Args) -> Result<(), anyhow::E
"...done opening genes database in {:?}",
before_open.elapsed()
);
data.genes = Some(GeneInfoDb { db: genes });
tracing::info!(
"...done opening genes database in {:?}",
before_opening.elapsed()
);
tracing::info!("Building gene names...");
let before_open = Instant::now();
let gene_names = extract_gene_names(&genes)?;
tracing::info!("...done building genes names {:?}", before_open.elapsed());
data.genes = Some(GeneInfoDb {
db: genes,
gene_strings: gene_names,

Check warning on line 330 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L324-L330

Added lines #L324 - L330 were not covered by tests
});
}
// Argument lists from the command line with the corresponding database enum value.
let paths_db_pairs = vec![
Expand Down Expand Up @@ -315,6 +384,16 @@ pub fn run(args_common: &common::cli::Args, args: &Args) -> Result<(), anyhow::E
args.listen_host.as_str(),
args.listen_port
);
tracing::info!(

Check warning on line 387 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L387

Added line #L387 was not covered by tests
" try: http://{}:{}/genes/search?q=BRCA",
args.listen_host.as_str(),

Check warning on line 389 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L389

Added line #L389 was not covered by tests
args.listen_port
);
tracing::info!(

Check warning on line 392 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L392

Added line #L392 was not covered by tests
" try: http://{}:{}/genes/search?q=BRCA&fields=hgnc_id,ensembl_gene_id,ncbi_gene_id,symbol",
args.listen_host.as_str(),

Check warning on line 394 in src/server/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/server/mod.rs#L394

Added line #L394 was not covered by tests
args.listen_port
);
tracing::info!(
" try: http://{}:{}/genes/info?hgnc_id=HGNC:12403",
args.listen_host.as_str(),
Expand Down

0 comments on commit 8565256

Please sign in to comment.