diff --git a/pagefind/features/fragments.feature b/pagefind/features/fragments.feature
index d6abae43..09908c63 100644
--- a/pagefind/features/fragments.feature
+++ b/pagefind/features/fragments.feature
@@ -3,18 +3,21 @@ Feature: Fragments
Given I have a "public/index.html" file with the body:
"""
Nothing
+ Nothing
"""
Given I have a "public/cat/index.html" file with the content:
"""
-
+
+
Cat Post.
cats
+
A post about the 'felines'
This post has some gnarly things to test the fragment formatting.
@@ -35,11 +38,13 @@ Feature: Fragments
let search = await pagefind.search("cat");
let data = await search.results[0].data();
- document.querySelector('[data-result]').innerText = data.title;
+ document.querySelector('[data-result]').innerText = data.meta.title;
+ document.querySelector('[data-result-two]').innerText = data.meta.image;
}
"""
Then There should be no logs
Then The selector "[data-result]" should contain "Cat Post."
+ Then The selector "[data-result-two]" should contain "/cat.png"
Scenario: Search results return nicely formatted content
When I evaluate:
@@ -96,7 +101,7 @@ Feature: Fragments
let search = await pagefind.search("cat");
let data = await search.results[0].data();
- document.querySelector('[data-result]').innerText = data.meta.image + " — " + data.meta.adjective;
+ document.querySelector('[data-result]').innerText = data.meta["social-image"] + " — " + data.meta.adjective;
}
"""
Then There should be no logs
diff --git a/pagefind/src/fossick/mod.rs b/pagefind/src/fossick/mod.rs
index 8c7cb46e..8563b7ef 100644
--- a/pagefind/src/fossick/mod.rs
+++ b/pagefind/src/fossick/mod.rs
@@ -114,7 +114,6 @@ impl Fossicker {
page_number: 0,
data: PageFragmentData {
url: build_url(&self.file_path, options),
- title: data.title.clone(),
content: data.digest.clone(),
filters: data.filters.clone(),
meta: data.meta.clone(),
diff --git a/pagefind/src/fossick/parser.rs b/pagefind/src/fossick/parser.rs
index 2aad749f..bfa1cbae 100644
--- a/pagefind/src/fossick/parser.rs
+++ b/pagefind/src/fossick/parser.rs
@@ -43,7 +43,6 @@ pub struct DomParser<'a> {
#[derive(Default, Debug)]
struct DomParserData {
current_node: Rc>,
- title: Option,
filters: HashMap>,
meta: HashMap,
}
@@ -65,7 +64,6 @@ struct DomParsingNode {
#[derive(Debug)]
pub struct DomParserResult {
pub digest: String,
- pub title: String,
pub filters: HashMap>,
pub meta: HashMap,
}
@@ -92,6 +90,7 @@ impl<'a> DomParser<'a> {
let should_ignore_el = el.has_attribute("data-pagefind-ignore") || REMOVE_SELECTORS.contains(&el.tag_name().as_str());
let filter = el.get_attribute("data-pagefind-filter").map(|attr| parse_attr_string(attr, el));
let meta = el.get_attribute("data-pagefind-meta").map(|attr| parse_attr_string(attr, el));
+ let tag_name = el.tag_name();
let node = Rc::new(RefCell::new(DomParsingNode{
parent: Some(Rc::clone(&data.borrow().current_node)),
@@ -106,7 +105,7 @@ impl<'a> DomParser<'a> {
data.current_node = Rc::clone(&node);
}
- let can_have_content = el.on_end_tag(enclose! { (data, node) move |end| {
+ let can_have_content = el.on_end_tag(enclose! { (data, node, tag_name) move |end| {
let mut data = data.borrow_mut();
let mut node = node.borrow_mut();
@@ -131,6 +130,10 @@ impl<'a> DomParser<'a> {
if let Some((meta, value)) = node.get_attribute_pair(&node.meta) {
data.meta.insert(meta, value);
}
+ // Try to capture the first title on the page (if unset)
+ if tag_name == "h1" && !data.meta.contains_key("title") {
+ data.meta.insert("title".into(), normalize_content(&node.current_value));
+ }
// If we bail out now, the content won't be persisted anywhere
// and the node + children will be dropped.
@@ -196,6 +199,12 @@ impl<'a> DomParser<'a> {
if let Some((meta, value)) = node.get_attribute_pair(&node.meta) {
data.meta.insert(meta, value);
}
+ // Try to capture the first image _after_ a title (if unset)
+ if tag_name == "img" && data.meta.contains_key("title") && !data.meta.contains_key("image") {
+ if let Some(src) = el.get_attribute("src") {
+ data.meta.insert("image".into(), src);
+ }
+ }
}
Ok(())
})},
@@ -206,17 +215,6 @@ impl<'a> DomParser<'a> {
node.current_value.push_str(el.as_str());
Ok(())
})},
- // Track the first h1 on the page as the title to return in search
- // TODO: This doesn't handle a chunk boundary,
- // we can instead handle this by marking the node as a title and handling it in end_node
- enclose! { (data) text!("h1", move |el| {
- let mut data = data.borrow_mut();
- let text = normalize_content(el.as_str());
- if data.title.is_none() && !text.is_empty() {
- data.title = Some(text);
- }
- Ok(())
- })},
],
..Settings::default()
},
@@ -254,7 +252,6 @@ impl<'a> DomParser<'a> {
let node = node.borrow();
DomParserResult {
digest: normalize_content(&node.current_value),
- title: data.title.unwrap_or_default(),
filters: data.filters,
meta: data.meta,
}
diff --git a/pagefind/src/fragments/mod.rs b/pagefind/src/fragments/mod.rs
index f6548218..8a5446b4 100644
--- a/pagefind/src/fragments/mod.rs
+++ b/pagefind/src/fragments/mod.rs
@@ -5,7 +5,6 @@ use serde::Serialize;
#[derive(Serialize, Debug)]
pub struct PageFragmentData {
pub url: String,
- pub title: String,
pub content: String,
pub word_count: usize,
pub filters: HashMap>,