From a49570fd20a16c0a41b1dfdaf121ef69f60acd7e Mon Sep 17 00:00:00 2001 From: Deadbeef Date: Mon, 6 Mar 2023 07:42:04 +0000 Subject: [PATCH] fix TODO comments --- compiler/rustc_ast/src/util/literal.rs | 10 +- compiler/rustc_ast_pretty/src/pprust/state.rs | 6 +- .../rustc_builtin_macros/src/concat_bytes.rs | 5 +- .../rustc_expand/src/proc_macro_server.rs | 6 +- compiler/rustc_feature/src/active.rs | 4 +- compiler/rustc_lexer/src/lib.rs | 108 +++++++++--------- library/proc_macro/src/bridge/mod.rs | 4 + .../rfcs/rfc-3348-c-string-literals/gate.rs | 6 + 8 files changed, 82 insertions(+), 67 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 5fa0ea354550a..cd3b163e3ac97 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -240,8 +240,14 @@ impl fmt::Display for LitKind { string = symbol )?; } - // TODO need to reescape - LitKind::CStr(..) => todo!(), + LitKind::CStr(ref bytes, StrStyle::Cooked) => { + write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))? + } + LitKind::CStr(ref bytes, StrStyle::Raw(n)) => { + // This can only be valid UTF-8. + let symbol = str::from_utf8(bytes).unwrap(); + write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?; + } LitKind::Int(n, ty) => { write!(f, "{n}")?; match ty { diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index 535ac89e751d5..61b7863c686cf 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -210,8 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String { token::ByteStrRaw(n) => { format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol) } - // TODO - token::CStr | token::CStrRaw(_) => todo!(), + token::CStr => format!("c\"{symbol}\""), + token::CStrRaw(n) => { + format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize)) + } token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(), }; diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index ae674995e4294..5ef35af0a059a 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -19,8 +19,9 @@ fn invalid_type_err( let snippet = cx.sess.source_map().span_to_snippet(span).ok(); match ast::LitKind::from_token_lit(token_lit) { Ok(ast::LitKind::CStr(_, _)) => { - // TODO - cx.span_err(span, "cannot concatenate C string litearls"); + // FIXME(c_str_literals): should concatenation of C string literals + // include the null bytes in the end? + cx.span_err(span, "cannot concatenate C string literals"); } Ok(ast::LitKind::Char(_)) => { let sugg = diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 04bdea273ebd5..891e84a2f3071 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -61,8 +61,8 @@ impl FromInternal for LitKind { token::StrRaw(n) => LitKind::StrRaw(n), token::ByteStr => LitKind::ByteStr, token::ByteStrRaw(n) => LitKind::ByteStrRaw(n), - // TODO - token::CStr | token::CStrRaw(_) => todo!(), + token::CStr => LitKind::CStr, + token::CStrRaw(n) => LitKind::CStrRaw(n), token::Err => LitKind::Err, token::Bool => unreachable!(), } @@ -80,6 +80,8 @@ impl ToInternal for LitKind { LitKind::StrRaw(n) => token::StrRaw(n), LitKind::ByteStr => token::ByteStr, LitKind::ByteStrRaw(n) => token::ByteStrRaw(n), + LitKind::CStr => token::CStr, + LitKind::CStrRaw(n) => token::CStrRaw(n), LitKind::Err => token::Err, } } diff --git a/compiler/rustc_feature/src/active.rs b/compiler/rustc_feature/src/active.rs index 45f462a63ee8c..4e5eebd285bbb 100644 --- a/compiler/rustc_feature/src/active.rs +++ b/compiler/rustc_feature/src/active.rs @@ -311,10 +311,10 @@ declare_features! ( (active, async_closure, "1.37.0", Some(62290), None), /// Allows async functions to be declared, implemented, and used in traits. (active, async_fn_in_trait, "1.66.0", Some(91611), None), - /// Treat `extern "C"` function as nounwind. - (active, c_unwind, "1.52.0", Some(74990), None), /// Allows `c"foo"` literals. (active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None), + /// Treat `extern "C"` function as nounwind. + (active, c_unwind, "1.52.0", Some(74990), None), /// Allows using C-variadics. (active, c_variadic, "1.34.0", Some(44930), None), /// Allows the use of `#[cfg(sanitize = "option")]`; set when -Zsanitizer is used. diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 95cb1f93e3906..ce8c9ebe7ce77 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -361,65 +361,18 @@ impl Cursor<'_> { }, // Byte literal, byte string literal, raw byte string literal or identifier. - 'b' => match (self.first(), self.second()) { - ('\'', _) => { - self.bump(); - let terminated = self.single_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Byte { terminated }; - Literal { kind, suffix_start } - } - ('"', _) => { - self.bump(); - let terminated = self.double_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = ByteStr { terminated }; - Literal { kind, suffix_start } - } - ('r', '"') | ('r', '#') => { - self.bump(); - let res = self.raw_double_quoted_string(2); - let suffix_start = self.pos_within_token(); - if res.is_ok() { - self.eat_literal_suffix(); - } - let kind = RawByteStr { n_hashes: res.ok() }; - Literal { kind, suffix_start } - } - _ => self.ident_or_unknown_prefix(), - }, + 'b' => self.c_or_byte_string( + |terminated| ByteStr { terminated }, + |n_hashes| RawByteStr { n_hashes }, + Some(|terminated| Byte { terminated }), + ), - // TODO deduplicate this code // c-string literal, raw c-string literal or identifier. - 'c' => match (self.first(), self.second()) { - ('"', _) => { - self.bump(); - let terminated = self.double_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = CStr { terminated }; - Literal { kind, suffix_start } - } - ('r', '"') | ('r', '#') => { - self.bump(); - let res = self.raw_double_quoted_string(2); - let suffix_start = self.pos_within_token(); - if res.is_ok() { - self.eat_literal_suffix(); - } - let kind = RawCStr { n_hashes: res.ok() }; - Literal { kind, suffix_start } - } - _ => self.ident_or_unknown_prefix(), - }, + 'c' => self.c_or_byte_string( + |terminated| CStr { terminated }, + |n_hashes| RawCStr { n_hashes }, + None, + ), // Identifier (this should be checked after other variant that can // start as identifier). @@ -583,6 +536,47 @@ impl Cursor<'_> { } } + fn c_or_byte_string( + &mut self, + mk_kind: impl FnOnce(bool) -> LiteralKind, + mk_kind_raw: impl FnOnce(Option) -> LiteralKind, + single_quoted: Option LiteralKind>, + ) -> TokenKind { + match (self.first(), self.second(), single_quoted) { + ('\'', _, Some(mk_kind)) => { + self.bump(); + let terminated = self.single_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('"', _, _) => { + self.bump(); + let terminated = self.double_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('r', '"', _) | ('r', '#', _) => { + self.bump(); + let res = self.raw_double_quoted_string(2); + let suffix_start = self.pos_within_token(); + if res.is_ok() { + self.eat_literal_suffix(); + } + let kind = mk_kind_raw(res.ok()); + Literal { kind, suffix_start } + } + _ => self.ident_or_unknown_prefix(), + } + } + fn number(&mut self, first_digit: char) -> LiteralKind { debug_assert!('0' <= self.prev() && self.prev() <= '9'); let mut base = Base::Decimal; diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs index 54b11c543f162..caecda1bc63fd 100644 --- a/library/proc_macro/src/bridge/mod.rs +++ b/library/proc_macro/src/bridge/mod.rs @@ -337,6 +337,8 @@ pub enum LitKind { StrRaw(u8), ByteStr, ByteStrRaw(u8), + CStr, + CStrRaw(u8), Err, } @@ -350,6 +352,8 @@ rpc_encode_decode!( StrRaw(n), ByteStr, ByteStrRaw(n), + CStr, + CStrRaw(n), Err, } ); diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs index 674b0c5e23351..b27da26ed23bb 100644 --- a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs +++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs @@ -1,3 +1,9 @@ +// gate-test-c_str_literals + +macro_rules! m { + ($t:tt) => {} +} + fn main() { c"foo"; //~^ ERROR: `c".."` literals are experimental