Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve escaping of byte, byte str, and c str proc-macro literals #123769

Merged
merged 3 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions library/proc_macro/src/escape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#[derive(Copy, Clone)]
pub(crate) struct EscapeOptions {
/// Produce \'.
pub escape_single_quote: bool,
/// Produce \".
pub escape_double_quote: bool,
/// Produce \x escapes for non-ASCII, and use \x rather than \u for ASCII
/// control characters.
pub escape_nonascii: bool,
}

pub(crate) fn escape_bytes(bytes: &[u8], opt: EscapeOptions) -> String {
let mut repr = String::new();

if opt.escape_nonascii {
for &byte in bytes {
escape_single_byte(byte, opt, &mut repr);
}
} else {
let mut chunks = bytes.utf8_chunks();
while let Some(chunk) = chunks.next() {
for ch in chunk.valid().chars() {
escape_single_char(ch, opt, &mut repr);
}
for &byte in chunk.invalid() {
escape_single_byte(byte, opt, &mut repr);
}
}
}

repr
}

fn escape_single_byte(byte: u8, opt: EscapeOptions, repr: &mut String) {
if byte == b'\0' {
repr.push_str("\\0");
} else if (byte == b'\'' && !opt.escape_single_quote)
|| (byte == b'"' && !opt.escape_double_quote)
{
repr.push(byte as char);
} else {
// Escapes \t, \r, \n, \\, \', \", and uses \x## for non-ASCII and
// for ASCII control characters.
repr.extend(byte.escape_ascii().map(char::from));
}
}

fn escape_single_char(ch: char, opt: EscapeOptions, repr: &mut String) {
if (ch == '\'' && !opt.escape_single_quote) || (ch == '"' && !opt.escape_double_quote) {
repr.push(ch);
} else {
// Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for
// non-printable characters and for Grapheme_Extend characters, which
// includes things like U+0300 "Combining Grave Accent".
repr.extend(ch.escape_debug());
}
}
51 changes: 37 additions & 14 deletions library/proc_macro/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@
pub mod bridge;

mod diagnostic;
mod escape;

#[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
pub use diagnostic::{Diagnostic, Level, MultiSpan};

use crate::escape::{escape_bytes, EscapeOptions};
use std::ffi::CStr;
use std::ops::{Range, RangeBounds};
use std::path::PathBuf;
Expand Down Expand Up @@ -1356,40 +1358,61 @@ impl Literal {
/// String literal.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn string(string: &str) -> Literal {
let quoted = format!("{:?}", string);
assert!(quoted.starts_with('"') && quoted.ends_with('"'));
let symbol = &quoted[1..quoted.len() - 1];
Literal::new(bridge::LitKind::Str, symbol, None)
let escape = EscapeOptions {
escape_single_quote: false,
escape_double_quote: true,
escape_nonascii: false,
};
let repr = escape_bytes(string.as_bytes(), escape);
Literal::new(bridge::LitKind::Str, &repr, None)
}

/// Character literal.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn character(ch: char) -> Literal {
let quoted = format!("{:?}", ch);
assert!(quoted.starts_with('\'') && quoted.ends_with('\''));
let symbol = &quoted[1..quoted.len() - 1];
Literal::new(bridge::LitKind::Char, symbol, None)
let escape = EscapeOptions {
escape_single_quote: true,
escape_double_quote: false,
escape_nonascii: false,
};
let repr = escape_bytes(ch.encode_utf8(&mut [0u8; 4]).as_bytes(), escape);
Literal::new(bridge::LitKind::Char, &repr, None)
}

/// Byte character literal.
#[stable(feature = "proc_macro_byte_character", since = "1.79.0")]
pub fn byte_character(byte: u8) -> Literal {
let string = [byte].escape_ascii().to_string();
Literal::new(bridge::LitKind::Byte, &string, None)
let escape = EscapeOptions {
escape_single_quote: true,
escape_double_quote: false,
escape_nonascii: true,
};
let repr = escape_bytes(&[byte], escape);
Literal::new(bridge::LitKind::Byte, &repr, None)
}

/// Byte string literal.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn byte_string(bytes: &[u8]) -> Literal {
let string = bytes.escape_ascii().to_string();
Literal::new(bridge::LitKind::ByteStr, &string, None)
let escape = EscapeOptions {
escape_single_quote: false,
escape_double_quote: true,
escape_nonascii: true,
};
let repr = escape_bytes(bytes, escape);
Literal::new(bridge::LitKind::ByteStr, &repr, None)
}

/// C string literal.
#[stable(feature = "proc_macro_c_str_literals", since = "1.79.0")]
pub fn c_string(string: &CStr) -> Literal {
let string = string.to_bytes().escape_ascii().to_string();
Literal::new(bridge::LitKind::CStr, &string, None)
let escape = EscapeOptions {
escape_single_quote: false,
escape_double_quote: true,
escape_nonascii: false,
};
let repr = escape_bytes(string.to_bytes(), escape);
Literal::new(bridge::LitKind::CStr, &repr, None)
}

/// Returns the span encompassing this literal.
Expand Down
83 changes: 83 additions & 0 deletions tests/ui/proc-macro/auxiliary/api/literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// ignore-tidy-linelength

use proc_macro::Literal;

pub fn test() {
test_display_literal();
test_parse_literal();
}

fn test_display_literal() {
assert_eq!(Literal::isize_unsuffixed(-10).to_string(), "-10");
assert_eq!(Literal::isize_suffixed(-10).to_string(), "-10isize");
assert_eq!(Literal::f32_unsuffixed(-10.0).to_string(), "-10.0");
assert_eq!(Literal::f32_suffixed(-10.0).to_string(), "-10f32");
assert_eq!(Literal::f64_unsuffixed(-10.0).to_string(), "-10.0");
assert_eq!(Literal::f64_suffixed(-10.0).to_string(), "-10f64");
assert_eq!(
Literal::f64_unsuffixed(1e100).to_string(),
"10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0",
);

assert_eq!(Literal::string("aA").to_string(), r#" "aA" "#.trim());
assert_eq!(Literal::string("\t").to_string(), r#" "\t" "#.trim());
assert_eq!(Literal::string("❤").to_string(), r#" "❤" "#.trim());
assert_eq!(Literal::string("'").to_string(), r#" "'" "#.trim());
assert_eq!(Literal::string("\"").to_string(), r#" "\"" "#.trim());
assert_eq!(Literal::string("\0").to_string(), r#" "\0" "#.trim());
assert_eq!(Literal::string("\u{1}").to_string(), r#" "\u{1}" "#.trim());

assert_eq!(Literal::byte_string(b"aA").to_string(), r#" b"aA" "#.trim());
assert_eq!(Literal::byte_string(b"\t").to_string(), r#" b"\t" "#.trim());
assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"'" "#.trim());
assert_eq!(Literal::byte_string(b"\"").to_string(), r#" b"\"" "#.trim());
assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\0" "#.trim());
assert_eq!(Literal::byte_string(b"\x01").to_string(), r#" b"\x01" "#.trim());

assert_eq!(Literal::c_string(c"aA").to_string(), r#" c"aA" "#.trim());
assert_eq!(Literal::c_string(c"\t").to_string(), r#" c"\t" "#.trim());
assert_eq!(Literal::c_string(c"❤").to_string(), r#" c"❤" "#.trim());
assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"'" "#.trim());
assert_eq!(Literal::c_string(c"\"").to_string(), r#" c"\"" "#.trim());
assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\u{7f}\xff\xfe\u{333}" "#.trim());

assert_eq!(Literal::character('a').to_string(), r#" 'a' "#.trim());
assert_eq!(Literal::character('\t').to_string(), r#" '\t' "#.trim());
assert_eq!(Literal::character('❤').to_string(), r#" '❤' "#.trim());
assert_eq!(Literal::character('\'').to_string(), r#" '\'' "#.trim());
assert_eq!(Literal::character('"').to_string(), r#" '"' "#.trim());
assert_eq!(Literal::character('\0').to_string(), r#" '\0' "#.trim());
assert_eq!(Literal::character('\u{1}').to_string(), r#" '\u{1}' "#.trim());

assert_eq!(Literal::byte_character(b'a').to_string(), r#" b'a' "#.trim());
assert_eq!(Literal::byte_character(b'\t').to_string(), r#" b'\t' "#.trim());
assert_eq!(Literal::byte_character(b'\'').to_string(), r#" b'\'' "#.trim());
assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'"' "#.trim());
assert_eq!(Literal::byte_character(0).to_string(), r#" b'\0' "#.trim());
assert_eq!(Literal::byte_character(1).to_string(), r#" b'\x01' "#.trim());
}

fn test_parse_literal() {
assert_eq!("1".parse::<Literal>().unwrap().to_string(), "1");
assert_eq!("1.0".parse::<Literal>().unwrap().to_string(), "1.0");
assert_eq!("'a'".parse::<Literal>().unwrap().to_string(), "'a'");
assert_eq!("b'a'".parse::<Literal>().unwrap().to_string(), "b'a'");
assert_eq!("\"\n\"".parse::<Literal>().unwrap().to_string(), "\"\n\"");
assert_eq!("b\"\"".parse::<Literal>().unwrap().to_string(), "b\"\"");
assert_eq!("c\"\"".parse::<Literal>().unwrap().to_string(), "c\"\"");
assert_eq!("r##\"\"##".parse::<Literal>().unwrap().to_string(), "r##\"\"##");
assert_eq!("10ulong".parse::<Literal>().unwrap().to_string(), "10ulong");
assert_eq!("-10ulong".parse::<Literal>().unwrap().to_string(), "-10ulong");

assert!("true".parse::<Literal>().is_err());
assert!(".8".parse::<Literal>().is_err());
assert!("0 1".parse::<Literal>().is_err());
assert!("'a".parse::<Literal>().is_err());
assert!(" 0".parse::<Literal>().is_err());
assert!("0 ".parse::<Literal>().is_err());
assert!("/* comment */0".parse::<Literal>().is_err());
assert!("0/* comment */".parse::<Literal>().is_err());
assert!("0// comment".parse::<Literal>().is_err());
assert!("- 10".parse::<Literal>().is_err());
assert!("-'x'".parse::<Literal>().is_err());
}
4 changes: 2 additions & 2 deletions tests/ui/proc-macro/auxiliary/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
extern crate proc_macro;

mod cmp;
mod parse;
mod literal;

use proc_macro::TokenStream;

Expand All @@ -19,7 +19,7 @@ pub fn run(input: TokenStream) -> TokenStream {
assert!(input.is_empty());

cmp::test();
parse::test();
literal::test();

TokenStream::new()
}
58 changes: 0 additions & 58 deletions tests/ui/proc-macro/auxiliary/api/parse.rs

This file was deleted.

Loading