diff --git a/pagefind/features/fragments.feature b/pagefind/features/fragments.feature index d6abae43..09908c63 100644 --- a/pagefind/features/fragments.feature +++ b/pagefind/features/fragments.feature @@ -3,18 +3,21 @@ Feature: Fragments Given I have a "public/index.html" file with the body: """

Nothing

+

Nothing

""" Given I have a "public/cat/index.html" file with the content: """ - + +

Cat Post.

cats +

A post about the 'felines'

This post has some gnarly things to test the fragment formatting.

@@ -35,11 +38,13 @@ Feature: Fragments let search = await pagefind.search("cat"); let data = await search.results[0].data(); - document.querySelector('[data-result]').innerText = data.title; + document.querySelector('[data-result]').innerText = data.meta.title; + document.querySelector('[data-result-two]').innerText = data.meta.image; } """ Then There should be no logs Then The selector "[data-result]" should contain "Cat Post." + Then The selector "[data-result-two]" should contain "/cat.png" Scenario: Search results return nicely formatted content When I evaluate: @@ -96,7 +101,7 @@ Feature: Fragments let search = await pagefind.search("cat"); let data = await search.results[0].data(); - document.querySelector('[data-result]').innerText = data.meta.image + " — " + data.meta.adjective; + document.querySelector('[data-result]').innerText = data.meta["social-image"] + " — " + data.meta.adjective; } """ Then There should be no logs diff --git a/pagefind/src/fossick/mod.rs b/pagefind/src/fossick/mod.rs index 8c7cb46e..8563b7ef 100644 --- a/pagefind/src/fossick/mod.rs +++ b/pagefind/src/fossick/mod.rs @@ -114,7 +114,6 @@ impl Fossicker { page_number: 0, data: PageFragmentData { url: build_url(&self.file_path, options), - title: data.title.clone(), content: data.digest.clone(), filters: data.filters.clone(), meta: data.meta.clone(), diff --git a/pagefind/src/fossick/parser.rs b/pagefind/src/fossick/parser.rs index 2aad749f..bfa1cbae 100644 --- a/pagefind/src/fossick/parser.rs +++ b/pagefind/src/fossick/parser.rs @@ -43,7 +43,6 @@ pub struct DomParser<'a> { #[derive(Default, Debug)] struct DomParserData { current_node: Rc>, - title: Option, filters: HashMap>, meta: HashMap, } @@ -65,7 +64,6 @@ struct DomParsingNode { #[derive(Debug)] pub struct DomParserResult { pub digest: String, - pub title: String, pub filters: HashMap>, pub meta: HashMap, } @@ -92,6 +90,7 @@ impl<'a> DomParser<'a> { let should_ignore_el = el.has_attribute("data-pagefind-ignore") || REMOVE_SELECTORS.contains(&el.tag_name().as_str()); let filter = el.get_attribute("data-pagefind-filter").map(|attr| parse_attr_string(attr, el)); let meta = el.get_attribute("data-pagefind-meta").map(|attr| parse_attr_string(attr, el)); + let tag_name = el.tag_name(); let node = Rc::new(RefCell::new(DomParsingNode{ parent: Some(Rc::clone(&data.borrow().current_node)), @@ -106,7 +105,7 @@ impl<'a> DomParser<'a> { data.current_node = Rc::clone(&node); } - let can_have_content = el.on_end_tag(enclose! { (data, node) move |end| { + let can_have_content = el.on_end_tag(enclose! { (data, node, tag_name) move |end| { let mut data = data.borrow_mut(); let mut node = node.borrow_mut(); @@ -131,6 +130,10 @@ impl<'a> DomParser<'a> { if let Some((meta, value)) = node.get_attribute_pair(&node.meta) { data.meta.insert(meta, value); } + // Try to capture the first title on the page (if unset) + if tag_name == "h1" && !data.meta.contains_key("title") { + data.meta.insert("title".into(), normalize_content(&node.current_value)); + } // If we bail out now, the content won't be persisted anywhere // and the node + children will be dropped. @@ -196,6 +199,12 @@ impl<'a> DomParser<'a> { if let Some((meta, value)) = node.get_attribute_pair(&node.meta) { data.meta.insert(meta, value); } + // Try to capture the first image _after_ a title (if unset) + if tag_name == "img" && data.meta.contains_key("title") && !data.meta.contains_key("image") { + if let Some(src) = el.get_attribute("src") { + data.meta.insert("image".into(), src); + } + } } Ok(()) })}, @@ -206,17 +215,6 @@ impl<'a> DomParser<'a> { node.current_value.push_str(el.as_str()); Ok(()) })}, - // Track the first h1 on the page as the title to return in search - // TODO: This doesn't handle a chunk boundary, - // we can instead handle this by marking the node as a title and handling it in end_node - enclose! { (data) text!("h1", move |el| { - let mut data = data.borrow_mut(); - let text = normalize_content(el.as_str()); - if data.title.is_none() && !text.is_empty() { - data.title = Some(text); - } - Ok(()) - })}, ], ..Settings::default() }, @@ -254,7 +252,6 @@ impl<'a> DomParser<'a> { let node = node.borrow(); DomParserResult { digest: normalize_content(&node.current_value), - title: data.title.unwrap_or_default(), filters: data.filters, meta: data.meta, } diff --git a/pagefind/src/fragments/mod.rs b/pagefind/src/fragments/mod.rs index f6548218..8a5446b4 100644 --- a/pagefind/src/fragments/mod.rs +++ b/pagefind/src/fragments/mod.rs @@ -5,7 +5,6 @@ use serde::Serialize; #[derive(Serialize, Debug)] pub struct PageFragmentData { pub url: String, - pub title: String, pub content: String, pub word_count: usize, pub filters: HashMap>,