Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Improve geosearch error messages and accept string as coord #523

Merged
merged 2 commits into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 37 additions & 12 deletions milli/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub enum UserError {
DocumentLimitReached,
InvalidDocumentId { document_id: Value },
InvalidFacetsDistribution { invalid_facets_name: BTreeSet<String> },
InvalidGeoField { document_id: Value },
InvalidGeoField(GeoError),
InvalidFilter(String),
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
SortRankingRuleMissing,
Expand All @@ -76,6 +76,14 @@ pub enum UserError {
InvalidMinTypoWordLenSetting(u8, u8),
}

#[derive(Debug)]
pub enum GeoError {
MissingLatitude { document_id: Value },
MissingLongitude { document_id: Value },
BadLatitude { document_id: Value, value: Value },
BadLongitude { document_id: Value, value: Value },
}

impl From<io::Error> for Error {
fn from(error: io::Error) -> Error {
// TODO must be improved and more precise
Expand Down Expand Up @@ -230,17 +238,7 @@ impl fmt::Display for UserError {
name_list
)
}
Self::InvalidGeoField { document_id } => {
let document_id = match document_id {
Value::String(id) => id.clone(),
_ => document_id.to_string(),
};
write!(
f,
"The document with the id: `{}` contains an invalid `_geo` field.",
document_id
)
},
Self::InvalidGeoField(error) => write!(f, "{error}"),
Self::InvalidDocumentId { document_id } => {
let document_id = match document_id {
Value::String(id) => id.clone(),
Expand Down Expand Up @@ -314,6 +312,33 @@ impl fmt::Display for FieldIdMapMissingEntry {

impl StdError for FieldIdMapMissingEntry {}

impl From<GeoError> for UserError {
fn from(geo_error: GeoError) -> Self {
UserError::InvalidGeoField(geo_error)
}
}

impl fmt::Display for GeoError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
GeoError::MissingLatitude { document_id } => {
write!(f, "Could not find latitude in the document with the id: `{document_id}`. Was expecting a `_geo.lat` field.")
}
GeoError::MissingLongitude { document_id } => {
write!(f, "Could not find longitude in the document with the id: `{document_id}`. Was expecting a `_geo.lng` field.")
}
GeoError::BadLatitude { document_id, value } => {
write!(f, "Could not parse latitude in the document with the id: `{document_id}`. Was expecting a number but instead got `{value}`.")
}
GeoError::BadLongitude { document_id, value } => {
write!(f, "Could not parse longitude in the document with the id: `{document_id}`. Was expecting a number but instead got `{value}`.")
}
}
}
}
irevoire marked this conversation as resolved.
Show resolved Hide resolved

impl StdError for GeoError {}

impl fmt::Display for SerializationError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expand Down
41 changes: 34 additions & 7 deletions milli/src/update/index_documents/extract/extract_geo_points.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use std::fs::File;
use std::io;
use std::result::Result as StdResult;

use concat_arrays::concat_arrays;
use serde_json::Value;

use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::error::GeoError;
use crate::{FieldId, InternalError, Result, UserError};

/// Extracts the geographical coordinates contained in each document under the `_geo` field.
Expand All @@ -24,19 +27,43 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
let obkv = obkv::KvReader::new(value);
let (lat, lng) = obkv.get(lat_fid).zip(obkv.get(lng_fid)).ok_or_else(|| {
// since we only needs the primary key when we throw an error we create this getter to
// lazily get it when needed
let primary_key = || -> Value {
let primary_key = obkv.get(primary_key_id).unwrap();
let primary_key = serde_json::from_slice(primary_key).unwrap();
UserError::InvalidGeoField { document_id: primary_key }
serde_json::from_slice(primary_key).unwrap()
};

// first we get the two fields
let lat = obkv.get(lat_fid).ok_or_else(|| -> UserError {
GeoError::MissingLatitude { document_id: primary_key() }.into()
})?;
let lng = obkv.get(lng_fid).ok_or_else(|| -> UserError {
GeoError::MissingLongitude { document_id: primary_key() }.into()
})?;
let (lat, lng): (f64, f64) = (
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
);

// then we extract the values
let lat = extract_value(serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?)
.map_err(|lat| -> UserError {
GeoError::BadLatitude { document_id: primary_key(), value: lat }.into()
})?;

let lng = extract_value(serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?)
.map_err(|lng| -> UserError {
GeoError::BadLongitude { document_id: primary_key(), value: lng }.into()
})?;

let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
writer.insert(docid_bytes, bytes)?;
}

Ok(writer_into_reader(writer)?)
}

fn extract_value(value: Value) -> StdResult<f64, Value> {
irevoire marked this conversation as resolved.
Show resolved Hide resolved
match value {
Value::Number(ref n) => n.as_f64().ok_or(value),
Value::String(ref s) => s.parse::<f64>().map_err(|_| value),
value => Err(value),
}
}
129 changes: 129 additions & 0 deletions milli/src/update/index_documents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1005,6 +1005,135 @@ mod tests {
wtxn.commit().unwrap();
}

#[test]
fn index_all_flavour_of_geo() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();

let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder = update::Settings::new(&mut wtxn, &index, &config);

builder.set_filterable_fields(hashset!(S("_geo")));
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();

let indexing_config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut wtxn = index.write_txn().unwrap();

let documents = documents!([
{ "id": 0, "_geo": { "lat": 31, "lng": [42] } },
{ "id": 1, "_geo": { "lat": "31" }, "_geo.lng": 42 },
{ "id": 2, "_geo": { "lng": "42" }, "_geo.lat": "31" },
{ "id": 3, "_geo.lat": 31, "_geo.lng": "42" },
]);
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(documents).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();

let rtxn = index.read_txn().unwrap();

let mut search = crate::Search::new(&rtxn, &index);
search.filter(crate::Filter::from_str("_geoRadius(31, 42, 0.000001)").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![0, 1, 2, 3]);
}

#[test]
fn geo_error() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();

let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder = update::Settings::new(&mut wtxn, &index, &config);

builder.set_filterable_fields(hashset!(S("_geo")));
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();

let indexing_config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut wtxn = index.write_txn().unwrap();

let documents = documents!([
{ "id": 0, "_geo": { "lng": 42 } }
]);
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(documents).unwrap();
let error = builder.execute().unwrap_err();
assert_eq!(
&error.to_string(),
r#"Could not find latitude in the document with the id: `0`. Was expecting a `_geo.lat` field."#
);

let documents = documents!([
{ "id": 0, "_geo": { "lat": 42 } }
]);
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(documents).unwrap();
let error = builder.execute().unwrap_err();
assert_eq!(
&error.to_string(),
r#"Could not find longitude in the document with the id: `0`. Was expecting a `_geo.lng` field."#
);

let documents = documents!([
{ "id": 0, "_geo": { "lat": "lol", "lng": 42 } }
]);
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(documents).unwrap();
let error = builder.execute().unwrap_err();
assert_eq!(
&error.to_string(),
r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `"lol"`."#
);

let documents = documents!([
{ "id": 0, "_geo": { "lat": [12, 13], "lng": 42 } }
]);
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(documents).unwrap();
let error = builder.execute().unwrap_err();
assert_eq!(
&error.to_string(),
r#"Could not parse latitude in the document with the id: `0`. Was expecting a number but instead got `[12,13]`."#
);

let documents = documents!([
{ "id": 0, "_geo": { "lat": 12, "lng": "hello" } }
]);
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config.clone(), |_| ())
.unwrap();
builder.add_documents(documents).unwrap();
let error = builder.execute().unwrap_err();
assert_eq!(
&error.to_string(),
r#"Could not parse longitude in the document with the id: `0`. Was expecting a number but instead got `"hello"`."#
);
}

#[test]
fn delete_documents_then_insert() {
let path = tempfile::tempdir().unwrap();
Expand Down