Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

search: fix anchor ids for duplicate headers #1749

Merged
merged 1 commit into from
Mar 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions src/renderer/html_handlebars/hbs_renderer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -768,16 +768,7 @@ fn insert_link_into_header(
content: &str,
id_counter: &mut HashMap<String, usize>,
) -> String {
let raw_id = utils::id_from_content(content);

let id_count = id_counter.entry(raw_id.clone()).or_insert(0);

let id = match *id_count {
0 => raw_id,
other => format!("{}-{}", raw_id, other),
};

*id_count += 1;
let id = utils::unique_id_from_content(content, id_counter);

format!(
r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,
Expand Down
3 changes: 2 additions & 1 deletion src/renderer/html_handlebars/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ fn render_item(

breadcrumbs.push(chapter.name.clone());

let mut id_counter = HashMap::new();
while let Some(event) = p.next() {
match event {
Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
Expand All @@ -120,7 +121,7 @@ fn render_item(
}
Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
in_heading = false;
section_id = Some(utils::id_from_content(&heading));
section_id = Some(utils::unique_id_from_content(&heading, &mut id_counter));
breadcrumbs.push(heading.clone());
}
Event::Start(Tag::FootnoteDefinition(name)) => {
Expand Down
57 changes: 54 additions & 3 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use regex::Regex;
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};

use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt::Write;
use std::path::Path;

Expand Down Expand Up @@ -44,6 +45,8 @@ pub fn normalize_id(content: &str) -> String {

/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
// This function should be made private when the deprecation expires.
#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
pub fn id_from_content(content: &str) -> String {
let mut content = content.to_string();

Expand All @@ -59,10 +62,30 @@ pub fn id_from_content(content: &str) -> String {

// Remove spaces and hashes indicating a header
let trimmed = content.trim().trim_start_matches('#').trim();

normalize_id(trimmed)
}

/// Generate an ID for use with anchors which is derived from a "normalised"
/// string.
///
/// Each ID returned will be unique, if the same `id_counter` is provided on
/// each call.
pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
let id = {
#[allow(deprecated)]
id_from_content(content)
};

// If we have headers with the same normalized id, append an incrementing counter
let id_count = id_counter.entry(id.clone()).or_insert(0);
let unique_id = match *id_count {
0 => id,
id_count => format!("{}-{}", id, id_count),
};
*id_count += 1;
unique_id
}

/// Fix links to the correct location.
///
/// This adjusts links, such as turning `.md` extensions to `.html`.
Expand Down Expand Up @@ -332,8 +355,9 @@ more text with spaces
}
}

mod html_munging {
use super::super::{id_from_content, normalize_id};
#[allow(deprecated)]
mod id_from_content {
use super::super::id_from_content;

#[test]
fn it_generates_anchors() {
Expand Down Expand Up @@ -361,6 +385,10 @@ more text with spaces
);
assert_eq!(id_from_content("## Über"), "Über");
}
}

mod html_munging {
use super::super::{normalize_id, unique_id_from_content};

#[test]
fn it_normalizes_ids() {
Expand All @@ -379,5 +407,28 @@ more text with spaces
assert_eq!(normalize_id("한국어"), "한국어");
assert_eq!(normalize_id(""), "");
}

#[test]
fn it_generates_unique_ids_from_content() {
// Same id if not given shared state
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
"中文標題-cjk-title"
);

// Different id if given shared state
let mut id_counter = Default::default();
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
assert_eq!(
unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
"中文標題-cjk-title"
);
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
}
}
}
1 change: 1 addition & 0 deletions tests/dummy_book/src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- [Markdown](first/markdown.md)
- [Unicode](first/unicode.md)
- [No Headers](first/no-headers.md)
- [Duplicate Headers](first/duplicate-headers.md)
- [Second Chapter](second.md)
- [Nested Chapter](second/nested.md)

Expand Down
9 changes: 9 additions & 0 deletions tests/dummy_book/src/first/duplicate-headers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Duplicate headers

This page validates behaviour of duplicate headers.

# Header Text

# Header Text

# header-text
10 changes: 8 additions & 2 deletions tests/rendered_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const TOC_SECOND_LEVEL: &[&str] = &[
"1.4. Markdown",
"1.5. Unicode",
"1.6. No Headers",
"1.7. Duplicate Headers",
"2.1. Nested Chapter",
];

Expand Down Expand Up @@ -633,11 +634,12 @@ mod search {
let some_section = get_doc_ref("first/index.html#some-section");
let summary = get_doc_ref("first/includes.html#summary");
let no_headers = get_doc_ref("first/no-headers.html");
let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
let conclusion = get_doc_ref("conclusion.html#conclusion");

let bodyidx = &index["index"]["index"]["body"]["root"];
let textidx = &bodyidx["t"]["e"]["x"]["t"];
assert_eq!(textidx["df"], 2);
assert_eq!(textidx["df"], 5);
assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);

Expand All @@ -646,7 +648,7 @@ mod search {
assert_eq!(docs[&some_section]["body"], "");
assert_eq!(
docs[&summary]["body"],
"Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Second Chapter Nested Chapter Conclusion"
"Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Second Chapter Nested Chapter Conclusion"
);
assert_eq!(
docs[&summary]["breadcrumbs"],
Expand All @@ -657,6 +659,10 @@ mod search {
docs[&no_headers]["breadcrumbs"],
"First Chapter » No Headers"
);
assert_eq!(
docs[&duplicate_headers_1]["breadcrumbs"],
"First Chapter » Duplicate Headers » Header Text"
);
assert_eq!(
docs[&no_headers]["body"],
"Capybara capybara capybara. Capybara capybara capybara."
Expand Down
Loading