From a9f0d290af23f0fc8d16e89606c29980e413a545 Mon Sep 17 00:00:00 2001 From: tiye Date: Thu, 27 Jun 2024 02:36:27 +0800 Subject: [PATCH] improve cjk in displaying; tag 0.6.11 --- Cargo.toml | 3 ++- src/edn.rs | 21 +++++++++++++++++---- src/lib.rs | 4 +++- tests/display_tests.rs | 9 ++++++++- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d2beff4..425f585 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cirru_edn" -version = "0.6.10" +version = "0.6.11" authors = ["jiyinyiyong "] edition = "2021" license = "MIT" @@ -18,6 +18,7 @@ cirru_parser = "0.1.30" hex = "0.4.3" lazy_static = "1.4.0" bincode = "2.0.0-rc.3" +cjk = "0.2.5" [dev-dependencies] criterion = "0.5.1" diff --git a/src/edn.rs b/src/edn.rs index 785be86..e5d625e 100644 --- a/src/edn.rs +++ b/src/edn.rs @@ -12,7 +12,7 @@ use std::{ }, collections::{HashMap, HashSet}, convert::{TryFrom, TryInto}, - fmt, + fmt::{self, Write}, hash::{Hash, Hasher}, iter::FromIterator, ptr, @@ -63,7 +63,15 @@ impl fmt::Display for Edn { if is_simple_token(s) { f.write_fmt(format_args!("|{}", s)) } else { - f.write_fmt(format_args!("\"|{}\"", s)) + f.write_str("\"|")?; + for c in s.chars() { + if is_simple_char(c) { + f.write_char(c)?; + } else { + f.write_str(&c.escape_default().to_string())?; + } + } + f.write_char('"') } } Self::Quote(v) => f.write_fmt(format_args!("(quote {})", v)), @@ -122,9 +130,14 @@ impl fmt::Display for Edn { } } +/// check if a char is simple enough to be printed without quotes +pub fn is_simple_char(c: char) -> bool { + matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | '-' | '?' | '.' | '$' | ',') || cjk::is_cjk_codepoint(c) +} + fn is_simple_token(tok: &str) -> bool { - for s in tok.bytes() { - if !matches!(s, b'0'..=b'9' | b'A'..=b'Z'| b'a'..=b'z'| b'-' | b'?' | b'.'| b'$' | b',') { + for s in tok.chars() { + if !is_simple_char(s) { return false; } } diff --git a/src/lib.rs b/src/lib.rs index 2349dab..f49c821 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,9 @@ use std::vec; use cirru_parser::{Cirru, CirruWriterOptions}; -pub use edn::{DynEq, Edn, EdnAnyRef, EdnListView, EdnMapView, EdnRecordView, EdnSetView, EdnTupleView}; +pub use edn::{ + is_simple_char, DynEq, Edn, EdnAnyRef, EdnListView, EdnMapView, EdnRecordView, EdnSetView, EdnTupleView, +}; pub use tag::EdnTag; /// parse Cirru code into data diff --git a/tests/display_tests.rs b/tests/display_tests.rs index 269bc9e..ded630a 100644 --- a/tests/display_tests.rs +++ b/tests/display_tests.rs @@ -2,7 +2,7 @@ extern crate cirru_edn; use std::{sync::Arc, vec}; -use cirru_edn::{Edn, EdnRecordView, EdnTag}; +use cirru_edn::{Edn, EdnListView, EdnRecordView, EdnTag}; #[test] fn display_data() { @@ -22,3 +22,10 @@ fn display_data() { let t2 = Edn::from((Arc::new(Edn::tag("t")), vec![Edn::Number(1.0), Edn::Number(2.0)])); assert_eq!(format!("{t2}"), "(:: :t 1 2)"); } + +#[test] +fn display_with_cjk() { + let r = Edn::List(EdnListView(vec![Edn::str("你好"), Edn::str("世界"), Edn::str("海 洋")])); + + assert_eq!(format!("{r}"), "([] |你好 |世界 \"|海 洋\")"); +}