From f1b2e1dba74d4996118b14a26afc8ee238d3ff7f Mon Sep 17 00:00:00 2001 From: Nico Burns Date: Thu, 19 Dec 2024 00:34:58 +1300 Subject: [PATCH] Eliminate `regex` and `once_cell` dependencies. Replace usage with `unicode_categories` crate which comrak already depends on. --- Cargo.lock | 2 -- Cargo.toml | 2 -- src/html.rs | 19 ++++++++++++++----- src/parser/mod.rs | 3 +-- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3141ab2e..0d0c42f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -165,8 +165,6 @@ dependencies = [ "entities", "memchr", "ntest", - "once_cell", - "regex", "shell-words", "slug", "syntect", diff --git a/Cargo.toml b/Cargo.toml index 5f5ff5f8..e9101dbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,8 +36,6 @@ doc = false [dependencies] typed-arena = "2.0.2" -regex = "1" -once_cell = "1.19.0" entities = "1.0.1" unicode_categories = "0.1.1" memchr = "2" diff --git a/src/html.rs b/src/html.rs index 07ad630d..c46f7c9a 100644 --- a/src/html.rs +++ b/src/html.rs @@ -7,13 +7,12 @@ use crate::nodes::{ }; use crate::parser::{Options, Plugins}; use crate::scanners; -use once_cell::sync::Lazy; -use regex::Regex; use std::borrow::Cow; use std::cell::Cell; use std::collections::{HashMap, HashSet}; use std::io::{self, Write}; use std::str; +use unicode_categories::UnicodeCategories; use crate::adapters::HeadingMeta; @@ -102,11 +101,21 @@ impl Anchorizer { /// assert_eq!("ticks-arent-in".to_string(), anchorizer.anchorize(source.to_string())); /// ``` pub fn anchorize(&mut self, header: String) -> String { - static REJECTED_CHARS: Lazy = - Lazy::new(|| Regex::new(r"[^\p{L}\p{M}\p{N}\p{Pc} -]").unwrap()); + fn is_permitted_char(&c: &char) -> bool { + c == ' ' + || c == '-' + || c.is_letter() + || c.is_mark() + || c.is_number() + || c.is_punctuation_connector() + } let mut id = header.to_lowercase(); - id = REJECTED_CHARS.replace_all(&id, "").replace(' ', "-"); + id = id + .chars() + .filter(is_permitted_char) + .map(|c| if c == ' ' { '-' } else { c }) + .collect(); let mut uniq = 0; id = loop { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5fba6c93..d3d11c1b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -787,8 +787,7 @@ pub struct RenderOptions { /// /// options.render.full_info_string = true; /// let html = markdown_to_html("``` rust extra info\nfn hello();\n```\n", &options); - /// let re = regex::Regex::new(r#"data-meta="extra info""#).unwrap(); - /// assert!(re.is_match(&html)); + /// assert!(html.contains(r#"data-meta="extra info""#)); /// ``` #[builder(default)] pub full_info_string: bool,