From 06556d887dc0971e62d1ebd3dd2b7fdb5d7bd0d6 Mon Sep 17 00:00:00 2001 From: Michael Cooper Date: Thu, 2 Sep 2021 14:55:17 +0000 Subject: [PATCH] feat(suggest): Add score field to suggestion (#119) Score indiciates the relative preference between suggestions, with higher scores being preferred. This can allow Firefox and other clients of Merino to make more intelligent decisions about what suggestions to include when many are available. Fixes #118 --- Cargo.lock | 4 +- merino-adm/src/remote_settings.rs | 6 +- merino-cache/Cargo.toml | 1 + merino-cache/src/redis/domain.rs | 4 +- merino-cache/src/redis/mod.rs | 7 +- merino-integration-tests/src/suggest.rs | 1 + merino-suggest/src/debug.rs | 4 +- merino-suggest/src/domain.rs | 140 ++++++++++++++++++++++++ merino-suggest/src/lib.rs | 13 ++- merino-suggest/src/wikifruit.rs | 6 +- merino-web/src/suggest.rs | 2 + merino/src/docs/api.rs | 4 + 12 files changed, 182 insertions(+), 10 deletions(-) create mode 100644 merino-suggest/src/domain.rs diff --git a/Cargo.lock b/Cargo.lock index 4da73f7c13..729f5d7112 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4290,9 +4290,9 @@ dependencies = [ [[package]] name = "tracing-bunyan-formatter" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce1eae70720bd6bb3944f7cf501761aeae658bd1f9293aa373c71a195064910" +checksum = "63053c8ba268268d59c06b919d6c94fd968b3bd9995c5f59a03dea42fc351e70" dependencies = [ "chrono", "gethostname", diff --git a/merino-adm/src/remote_settings.rs b/merino-adm/src/remote_settings.rs index 2f0ae5e0cc..ab3dda3802 100644 --- a/merino-adm/src/remote_settings.rs +++ b/merino-adm/src/remote_settings.rs @@ -7,7 +7,8 @@ use http::Uri; use lazy_static::lazy_static; use merino_settings::Settings; use merino_suggest::{ - SetupError, SuggestError, Suggestion, SuggestionProvider, SuggestionRequest, SuggestionResponse, + Proportion, SetupError, SuggestError, Suggestion, SuggestionProvider, SuggestionRequest, + SuggestionResponse, }; use remote_settings_client::client::FileStorage; use serde::Deserialize; @@ -195,6 +196,7 @@ impl RemoteSettingsSuggester { is_sponsored: !NON_SPONSORED_IAB_CATEGORIES .contains(&adm_suggestion.iab_category.as_str()), icon: icon_url, + score: Proportion::from(0.2), }); for keyword in adm_suggestion.keywords { suggestions.insert(keyword, merino_suggestion.clone()); @@ -365,6 +367,7 @@ mod tests { provider: "test".to_string(), is_sponsored: false, icon: Uri::from_static("https://en.wikipedia.org/favicon.ico"), + score: Proportion::zero(), }), ); let rs_suggester = RemoteSettingsSuggester { suggestions }; @@ -404,6 +407,7 @@ mod tests { provider: "test".to_string(), is_sponsored: false, icon: Uri::from_static("https://en.wikipedia.org/favicon.ico"), + score: Proportion::zero(), }), ); let rs_suggester = RemoteSettingsSuggester { suggestions }; diff --git a/merino-cache/Cargo.toml b/merino-cache/Cargo.toml index 94eb71b9bf..f0b9cc40f2 100644 --- a/merino-cache/Cargo.toml +++ b/merino-cache/Cargo.toml @@ -22,3 +22,4 @@ uuid = "0.8" http = "^0.2" proptest = "^1" fake = "2.4" +tokio = { version = "1", features = ["macros"] } diff --git a/merino-cache/src/redis/domain.rs b/merino-cache/src/redis/domain.rs index 0a733f4b9f..9ec8ca2184 100644 --- a/merino-cache/src/redis/domain.rs +++ b/merino-cache/src/redis/domain.rs @@ -102,7 +102,7 @@ impl FromRedisValue for RedisTtl { mod tests { use anyhow::{anyhow, Result}; use http::Uri; - use merino_suggest::Suggestion; + use merino_suggest::{Proportion, Suggestion}; use proptest::prelude::*; use redis::{FromRedisValue, ToRedisArgs}; @@ -120,6 +120,7 @@ mod tests { provider: "One Inc.".to_string(), is_sponsored: true, icon: Uri::from_static("https://example.com/icon/one.png"), + score: Proportion::zero(), }]; let mut serialized = SERIALIZATION_VERSION.to_vec(); @@ -187,6 +188,7 @@ mod tests { provider: "One Inc.".to_string(), is_sponsored: true, icon: Uri::from_static("https://example.com/icon/one.png"), + score: Proportion::zero(), }]; let val = RedisSuggestions(original_suggestions.clone()).to_redis_args(); diff --git a/merino-cache/src/redis/mod.rs b/merino-cache/src/redis/mod.rs index 615f8d5d09..e039d17557 100644 --- a/merino-cache/src/redis/mod.rs +++ b/merino-cache/src/redis/mod.rs @@ -400,7 +400,7 @@ where #[cfg(test)] mod test { use http::Uri; - use merino_suggest::Suggestion; + use merino_suggest::{Proportion, Suggestion}; use super::*; @@ -434,6 +434,7 @@ mod test { provider: text.clone(), is_sponsored: false, icon: uri.clone(), + score: Proportion::zero(), }; let suggestion2 = Suggestion { id: 5678, @@ -476,7 +477,7 @@ mod test { .query_async::(&mut redis_connection) .await .unwrap(); - assert_eq!(res1, res2); + assert_eq!(res1, res2, "cached values should match"); // trying to write with an new lock should work and release the lock. assert!(rlock @@ -491,7 +492,7 @@ mod test { .query_async::(&mut redis_connection) .await .unwrap(); - assert_ne!(res1, res2); + assert_ne!(res1, res2, "cached values should not match"); Ok(()) } diff --git a/merino-integration-tests/src/suggest.rs b/merino-integration-tests/src/suggest.rs index 3f89a1cb0f..1f3f158d41 100644 --- a/merino-integration-tests/src/suggest.rs +++ b/merino-integration-tests/src/suggest.rs @@ -50,6 +50,7 @@ async fn test_expected_fields(TestingTools { test_client, .. }: TestingTools) -> "advertiser", "is_sponsored", "icon", + "score", ]; dbg!(&keys); for expected_key in expected_keys { diff --git a/merino-suggest/src/debug.rs b/merino-suggest/src/debug.rs index eab19588b8..a766812573 100644 --- a/merino-suggest/src/debug.rs +++ b/merino-suggest/src/debug.rs @@ -10,7 +10,8 @@ use fake::{Fake, Faker}; use merino_settings::Settings; use crate::{ - SetupError, SuggestError, Suggestion, SuggestionProvider, SuggestionRequest, SuggestionResponse, + Proportion, SetupError, SuggestError, Suggestion, SuggestionProvider, SuggestionRequest, + SuggestionResponse, }; /// A toy suggester to test the system. @@ -49,6 +50,7 @@ impl<'a> SuggestionProvider<'a> for DebugProvider { Ok(SuggestionResponse::new(vec![Suggestion { title: json, provider: "Merino::Debug".into(), + score: Proportion::zero(), ..Faker.fake() }])) } diff --git a/merino-suggest/src/domain.rs b/merino-suggest/src/domain.rs new file mode 100644 index 0000000000..2331313b63 --- /dev/null +++ b/merino-suggest/src/domain.rs @@ -0,0 +1,140 @@ +//! Datatypes to better represent the domain of Merino. + +use anyhow::ensure; +use rand::distributions::{Distribution, Standard}; +use serde::{de, Deserialize, Serialize}; +use std::convert::{TryFrom, TryInto}; +use std::fmt::Debug; + +/// Represents a value from 0.0 to 1.0, inclusive. That is: a portion of +/// something that cannot be negative or exceed 100%. +/// +/// Stored internally as a u32. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Proportion(u32); + +impl Proportion { + /// The lowest value for a portion, corresponding to 0%. + pub fn zero() -> Self { + Proportion(0) + } + + /// The highest value for a portion, corresponding to 100%. + pub fn one() -> Self { + Proportion(u32::MAX) + } + + /// Converts a float value into a Proportion. Panics if the value is not + /// between zero and one. + /// + /// This is not implemented using [`std::config::From`] because you cannot + /// implement both Try and TryFrom for the same pair of types, due to a + /// blanket `impl TryFor for U where U: Try`. + pub fn from(v: T) -> Self + where + T: TryInto, + >::Error: Debug, + { + v.try_into().unwrap() + } +} + +macro_rules! impl_for_float { + ($type: ty) => { + impl TryFrom<$type> for Proportion { + type Error = anyhow::Error; + + fn try_from(v: $type) -> Result { + ensure!(!v.is_infinite(), "v cannot be infinite"); + ensure!(v >= 0.0, "v must be positive"); + ensure!(v <= 1.0, "v cannot be greater than 1"); + + Ok(Self((v * (u32::MAX as $type)) as u32)) + } + } + + impl From for $type { + fn from(portion: Proportion) -> $type { + (portion.0 as $type) / (u32::MAX as $type) + } + } + + impl From<&Proportion> for $type { + fn from(portion: &Proportion) -> $type { + (portion.0 as $type) / (u32::MAX as $type) + } + } + }; +} + +impl_for_float!(f32); +impl_for_float!(f64); + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Proportion { + Proportion(rng.gen()) + } +} + +impl Serialize for Proportion { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_f64(self.into()) + } +} + +impl<'de> Deserialize<'de> for Proportion { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + /// Visitor for deserializing a Proportion + struct Visitor; + + impl<'de> de::Visitor<'de> for Visitor { + type Value = Proportion; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(formatter, "value between 0.0 and 1.0") + } + + fn visit_i64(self, v: i64) -> Result + where + E: de::Error, + { + if v >= 0 { + self.visit_u64(v as u64) + } else { + Err(de::Error::invalid_value(de::Unexpected::Signed(v), &self)) + } + } + + // u8, u16, and u32 delegate to this + fn visit_u64(self, v: u64) -> Result + where + E: de::Error, + { + if v == 0 { + Ok(Proportion::zero()) + } else if v == 1 { + Ok(Proportion::one()) + } else { + Err(de::Error::invalid_value(de::Unexpected::Unsigned(v), &self)) + } + } + + // f32 delegates to this + fn visit_f64(self, v: f64) -> Result + where + E: de::Error, + { + v.try_into() + .map_err(|_err| de::Error::invalid_value(de::Unexpected::Float(v), &self)) + } + } + + deserializer.deserialize_any(Visitor) + } +} diff --git a/merino-suggest/src/lib.rs b/merino-suggest/src/lib.rs index 767108fe5a..c97e345eab 100644 --- a/merino-suggest/src/lib.rs +++ b/merino-suggest/src/lib.rs @@ -4,6 +4,7 @@ mod debug; pub mod device_info; +mod domain; mod multi; mod wikifruit; @@ -13,6 +14,7 @@ use std::hash::Hash; use std::ops::Range; use std::time::Duration; +use crate::device_info::DeviceInfo; use async_trait::async_trait; use fake::{ faker::{ @@ -27,7 +29,7 @@ use serde_with::{serde_as, DisplayFromStr}; use thiserror::Error; pub use crate::debug::DebugProvider; -use crate::device_info::DeviceInfo; +pub use crate::domain::Proportion; pub use crate::multi::Multi; pub use crate::wikifruit::WikiFruit; @@ -195,6 +197,14 @@ pub struct Suggestion { /// The URL of the icon to show along side this suggestion. #[serde_as(as = "DisplayFromStr")] pub icon: Uri, + + /// A value used to compare suggestions. When choosing a suggestion to show + /// the user, higher scored suggestions are preferred. Should range from 0.0 + /// to 1.0. + /// + /// Note that Firefox uses a static value of 0.2 for Remote Settings + /// provided suggestions. + pub score: Proportion, } impl<'a, F> fake::Dummy for Suggestion { @@ -209,6 +219,7 @@ impl<'a, F> fake::Dummy for Suggestion { provider: Words(2..4).fake_with_rng::, R>(rng).join(" "), is_sponsored: rng.gen(), icon: fake_example_url(rng), + score: rng.gen(), } } } diff --git a/merino-suggest/src/wikifruit.rs b/merino-suggest/src/wikifruit.rs index a346d2e05f..e43a0c3e65 100644 --- a/merino-suggest/src/wikifruit.rs +++ b/merino-suggest/src/wikifruit.rs @@ -11,7 +11,8 @@ use http::Uri; use merino_settings::Settings; use crate::{ - SetupError, SuggestError, Suggestion, SuggestionProvider, SuggestionRequest, SuggestionResponse, + domain::Proportion, SetupError, SuggestError, Suggestion, SuggestionProvider, + SuggestionRequest, SuggestionResponse, }; /// A toy suggester to test the system. @@ -56,6 +57,7 @@ impl<'a> SuggestionProvider<'a> for WikiFruit { provider: "Merino::WikiFruit".to_string(), is_sponsored: false, icon: Uri::from_static("https://en.wikipedia.org/favicon.ico"), + score: Proportion::zero(), }), "banana" => Some(Suggestion { id: 1, @@ -67,6 +69,7 @@ impl<'a> SuggestionProvider<'a> for WikiFruit { provider: "Merino::WikiFruit".to_string(), is_sponsored: false, icon: Uri::from_static("https://en.wikipedia.org/favicon.ico"), + score: Proportion::zero(), }), "cherry" => Some(Suggestion { id: 1, @@ -78,6 +81,7 @@ impl<'a> SuggestionProvider<'a> for WikiFruit { provider: "Merino::WikiFruit".to_string(), is_sponsored: false, icon: Uri::from_static("https://en.wikipedia.org/favicon.ico"), + score: Proportion::zero(), }), _ => None, }; diff --git a/merino-web/src/suggest.rs b/merino-web/src/suggest.rs index 374a4251c5..86c1e46fc0 100644 --- a/merino-web/src/suggest.rs +++ b/merino-web/src/suggest.rs @@ -162,6 +162,7 @@ impl<'a> Serialize for SuggestionWrapper<'a> { is_sponsored: bool, icon: String, advertiser: &'a str, + score: f32, } let provider = &self.0.provider; @@ -176,6 +177,7 @@ impl<'a> Serialize for SuggestionWrapper<'a> { is_sponsored: self.0.is_sponsored, icon: self.0.icon.to_string(), advertiser: provider, + score: self.0.score.into(), }; generated.serialize(serializer) diff --git a/merino/src/docs/api.rs b/merino/src/docs/api.rs index ee45c5e87c..311a5aabe3 100644 --- a/merino/src/docs/api.rs +++ b/merino/src/docs/api.rs @@ -107,6 +107,10 @@ following keys: small square image, suitable to be included inline with the text, such as a site's favicon. +- `score` - A value between 0.0 and 1.0 used to compare suggestions. When + choosing a suggestion to show the user, higher scored suggestions are + preferred. + ### Response Headers Responses will carry standard HTTP caching headers that indicate the validity of