rust-lang · ehuss · Mar 28, 2022 · Feb 18, 2022
diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs
@@ -768,16 +768,7 @@ fn insert_link_into_header(
     content: &str,
     id_counter: &mut HashMap<String, usize>,
 ) -> String {
-    let raw_id = utils::id_from_content(content);
-
-    let id_count = id_counter.entry(raw_id.clone()).or_insert(0);
-
-    let id = match *id_count {
-        0 => raw_id,
-        other => format!("{}-{}", raw_id, other),
-    };
-
-    *id_count += 1;
+    let id = utils::unique_id_from_content(content, id_counter);
 
     format!(
         r##"<h{level} id="{id}"><a class="header" href="#{id}">{text}</a></h{level}>"##,

diff --git a/src/renderer/html_handlebars/search.rs b/src/renderer/html_handlebars/search.rs
@@ -97,6 +97,7 @@ fn render_item(
 
     breadcrumbs.push(chapter.name.clone());
 
+    let mut id_counter = HashMap::new();
     while let Some(event) = p.next() {
         match event {
             Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
@@ -120,7 +121,7 @@ fn render_item(
             }
             Event::End(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
                 in_heading = false;
-                section_id = Some(utils::id_from_content(&heading));
+                section_id = Some(utils::unique_id_from_content(&heading, &mut id_counter));
                 breadcrumbs.push(heading.clone());
             }
             Event::Start(Tag::FootnoteDefinition(name)) => {

diff --git a/src/utils/mod.rs b/src/utils/mod.rs
@@ -9,6 +9,7 @@ use regex::Regex;
 use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
 
 use std::borrow::Cow;
+use std::collections::HashMap;
 use std::fmt::Write;
 use std::path::Path;
 
@@ -44,6 +45,8 @@ pub fn normalize_id(content: &str) -> String {
 
 /// Generate an ID for use with anchors which is derived from a "normalised"
 /// string.
+// This function should be made private when the deprecation expires.
+#[deprecated(since = "0.4.16", note = "use unique_id_from_content instead")]
 pub fn id_from_content(content: &str) -> String {
     let mut content = content.to_string();
 
@@ -59,10 +62,30 @@ pub fn id_from_content(content: &str) -> String {
 
     // Remove spaces and hashes indicating a header
     let trimmed = content.trim().trim_start_matches('#').trim();
-
     normalize_id(trimmed)
 }
 
+/// Generate an ID for use with anchors which is derived from a "normalised"
+/// string.
+///
+/// Each ID returned will be unique, if the same `id_counter` is provided on
+/// each call.
+pub fn unique_id_from_content(content: &str, id_counter: &mut HashMap<String, usize>) -> String {
+    let id = {
+        #[allow(deprecated)]
+        id_from_content(content)
+    };
+
+    // If we have headers with the same normalized id, append an incrementing counter
+    let id_count = id_counter.entry(id.clone()).or_insert(0);
+    let unique_id = match *id_count {
+        0 => id,
+        id_count => format!("{}-{}", id, id_count),
+    };
+    *id_count += 1;
+    unique_id
+}
+
 /// Fix links to the correct location.
 ///
 /// This adjusts links, such as turning `.md` extensions to `.html`.
@@ -332,8 +355,9 @@ more text with spaces
         }
     }
 
-    mod html_munging {
-        use super::super::{id_from_content, normalize_id};
+    #[allow(deprecated)]
+    mod id_from_content {
+        use super::super::id_from_content;
 
         #[test]
         fn it_generates_anchors() {
@@ -361,6 +385,10 @@ more text with spaces
             );
             assert_eq!(id_from_content("## Über"), "Über");
         }
+    }
+
+    mod html_munging {
+        use super::super::{normalize_id, unique_id_from_content};
 
         #[test]
         fn it_normalizes_ids() {
@@ -379,5 +407,28 @@ more text with spaces
             assert_eq!(normalize_id("한국어"), "한국어");
             assert_eq!(normalize_id(""), "");
         }
+
+        #[test]
+        fn it_generates_unique_ids_from_content() {
+            // Same id if not given shared state
+            assert_eq!(
+                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
+                "中文標題-cjk-title"
+            );
+            assert_eq!(
+                unique_id_from_content("## 中文標題 CJK title", &mut Default::default()),
+                "中文標題-cjk-title"
+            );
+
+            // Different id if given shared state
+            let mut id_counter = Default::default();
+            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über");
+            assert_eq!(
+                unique_id_from_content("## 中文標題 CJK title", &mut id_counter),
+                "中文標題-cjk-title"
+            );
+            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-1");
+            assert_eq!(unique_id_from_content("## Über", &mut id_counter), "Über-2");
+        }
     }
 }
diff --git a/tests/dummy_book/src/SUMMARY.md b/tests/dummy_book/src/SUMMARY.md
@@ -13,6 +13,7 @@
     - [Markdown](first/markdown.md)
     - [Unicode](first/unicode.md)
     - [No Headers](first/no-headers.md)
+    - [Duplicate Headers](first/duplicate-headers.md)
 - [Second Chapter](second.md)
     - [Nested Chapter](second/nested.md)
 

diff --git a/tests/dummy_book/src/first/duplicate-headers.md b/tests/dummy_book/src/first/duplicate-headers.md
@@ -0,0 +1,9 @@
+# Duplicate headers
+
+This page validates behaviour of duplicate headers.
+
+# Header Text
+
+# Header Text
+
+# header-text
diff --git a/tests/rendered_output.rs b/tests/rendered_output.rs
@@ -35,6 +35,7 @@ const TOC_SECOND_LEVEL: &[&str] = &[
     "1.4. Markdown",
     "1.5. Unicode",
     "1.6. No Headers",
+    "1.7. Duplicate Headers",
     "2.1. Nested Chapter",
 ];
 
@@ -633,11 +634,12 @@ mod search {
         let some_section = get_doc_ref("first/index.html#some-section");
         let summary = get_doc_ref("first/includes.html#summary");
         let no_headers = get_doc_ref("first/no-headers.html");
+        let duplicate_headers_1 = get_doc_ref("first/duplicate-headers.html#header-text-1");
         let conclusion = get_doc_ref("conclusion.html#conclusion");
 
         let bodyidx = &index["index"]["index"]["body"]["root"];
         let textidx = &bodyidx["t"]["e"]["x"]["t"];
-        assert_eq!(textidx["df"], 2);
+        assert_eq!(textidx["df"], 5);
         assert_eq!(textidx["docs"][&first_chapter]["tf"], 1.0);
         assert_eq!(textidx["docs"][&introduction]["tf"], 1.0);
 
@@ -646,7 +648,7 @@ mod search {
         assert_eq!(docs[&some_section]["body"], "");
         assert_eq!(
             docs[&summary]["body"],
-            "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Second Chapter Nested Chapter Conclusion"
+            "Dummy Book Introduction First Chapter Nested Chapter Includes Recursive Markdown Unicode No Headers Duplicate Headers Second Chapter Nested Chapter Conclusion"
         );
         assert_eq!(
             docs[&summary]["breadcrumbs"],
@@ -657,6 +659,10 @@ mod search {
             docs[&no_headers]["breadcrumbs"],
             "First Chapter » No Headers"
         );
+        assert_eq!(
+            docs[&duplicate_headers_1]["breadcrumbs"],
+            "First Chapter » Duplicate Headers » Header Text"
+        );
         assert_eq!(
             docs[&no_headers]["body"],
             "Capybara capybara capybara. Capybara capybara capybara."