From e658144586e0fe4f77a7dadf7c80185fd0b71279 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Nov 2022 13:01:04 +1100 Subject: [PATCH 1/9] Rename `LitKind::to_token_lit` as `LitKind::synthesize_token_lit`. This makes it clearer that it's not a lossless conversion, which I find helpful. --- compiler/rustc_ast/src/attr/mod.rs | 4 ++-- compiler/rustc_ast/src/util/literal.rs | 4 ++-- compiler/rustc_ast_pretty/src/pprust/state/expr.rs | 2 +- compiler/rustc_expand/src/build.rs | 2 +- compiler/rustc_expand/src/proc_macro_server.rs | 2 +- compiler/rustc_hir_pretty/src/lib.rs | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 057cc26b5799e..1ba4691467586 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -328,7 +328,7 @@ pub fn mk_name_value_item_str(ident: Ident, str: Symbol, str_span: Span) -> Meta } pub fn mk_name_value_item(ident: Ident, kind: LitKind, lit_span: Span) -> MetaItem { - let lit = MetaItemLit { token_lit: kind.to_token_lit(), kind, span: lit_span }; + let lit = MetaItemLit { token_lit: kind.synthesize_token_lit(), kind, span: lit_span }; let span = ident.span.to(lit_span); MetaItem { path: Path::from_ident(ident), kind: MetaItemKind::NameValue(lit), span } } @@ -408,7 +408,7 @@ pub fn mk_attr_name_value_str( val: Symbol, span: Span, ) -> Attribute { - let lit = LitKind::Str(val, StrStyle::Cooked).to_token_lit(); + let lit = LitKind::Str(val, StrStyle::Cooked).synthesize_token_lit(); let expr = P(Expr { id: DUMMY_NODE_ID, kind: ExprKind::Lit(lit), diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 1d6e7914f3a5c..5e6c94f1e6fc8 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -142,10 +142,10 @@ impl LitKind { }) } - /// Attempts to recover a token from semantic literal. + /// Synthesizes a token from a semantic literal. /// This function is used when the original token doesn't exist (e.g. the literal is created /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn to_token_lit(&self) -> token::Lit { + pub fn synthesize_token_lit(&self) -> token::Lit { let (kind, symbol, suffix) = match *self { LitKind::Str(symbol, ast::StrStyle::Cooked) => { // Don't re-intern unless the escaped string is different. diff --git a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs index 81483ac30d1de..828b9d5ad5f68 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs @@ -323,7 +323,7 @@ impl<'a> State<'a> { self.print_token_literal(*token_lit, expr.span); } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone()).to_token_lit(); + let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); self.print_token_literal(lit, expr.span) } ast::ExprKind::Cast(expr, ty) => { diff --git a/compiler/rustc_expand/src/build.rs b/compiler/rustc_expand/src/build.rs index c978297295d40..b56e1a24834f0 100644 --- a/compiler/rustc_expand/src/build.rs +++ b/compiler/rustc_expand/src/build.rs @@ -333,7 +333,7 @@ impl<'a> ExtCtxt<'a> { } fn expr_lit(&self, span: Span, lit_kind: ast::LitKind) -> P { - let token_lit = lit_kind.to_token_lit(); + let token_lit = lit_kind.synthesize_token_lit(); self.expr(span, ast::ExprKind::Lit(token_lit)) } diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 7616579611711..57f66758ef005 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -526,7 +526,7 @@ impl server::TokenStream for Rustc<'_, '_> { Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span)) } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone()).to_token_lit(); + let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span)) } ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { diff --git a/compiler/rustc_hir_pretty/src/lib.rs b/compiler/rustc_hir_pretty/src/lib.rs index 95729822677bd..10b2265c522a0 100644 --- a/compiler/rustc_hir_pretty/src/lib.rs +++ b/compiler/rustc_hir_pretty/src/lib.rs @@ -1256,7 +1256,7 @@ impl<'a> State<'a> { fn print_literal(&mut self, lit: &hir::Lit) { self.maybe_print_comment(lit.span.lo()); - self.word(lit.node.to_token_lit().to_string()) + self.word(lit.node.synthesize_token_lit().to_string()) } fn print_inline_asm(&mut self, asm: &hir::InlineAsm<'_>) { From a7f35c42d474f893c56b6e0f7df3f8bb965f2650 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Nov 2022 13:35:44 +1100 Subject: [PATCH 2/9] Add `StrStyle` to `ast::LitKind::ByteStr`. This is required to distinguish between cooked and raw byte string literals in an `ast::LitKind`, without referring to an adjacent `token::Lit`. It's a prerequisite for the next commit. --- compiler/rustc_ast/src/ast.rs | 7 ++++--- compiler/rustc_ast/src/util/literal.rs | 16 +++++++++++----- compiler/rustc_ast_lowering/src/expr.rs | 2 +- .../rustc_ast_pretty/src/pprust/state/expr.rs | 3 ++- .../rustc_builtin_macros/src/concat_bytes.rs | 6 +++--- compiler/rustc_expand/src/base.rs | 2 +- compiler/rustc_expand/src/build.rs | 2 +- compiler/rustc_expand/src/proc_macro_server.rs | 3 ++- compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs | 2 +- compiler/rustc_hir_typeck/src/pat.rs | 2 +- .../src/build/expr/as_constant.rs | 4 ++-- compiler/rustc_mir_build/src/thir/constant.rs | 4 ++-- .../src/invalid_utf8_in_unchecked.rs | 2 +- .../clippy_lints/src/large_include_file.rs | 2 +- .../clippy_lints/src/matches/match_same_arms.rs | 2 +- .../clippy/clippy_lints/src/utils/author.rs | 2 +- .../clippy/clippy_utils/src/check_proc_macro.rs | 4 +++- src/tools/clippy/clippy_utils/src/consts.rs | 2 +- 18 files changed, 39 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index 6a2f1f0c5749c..b869b2f8af994 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1796,8 +1796,9 @@ pub enum LitKind { /// A string literal (`"foo"`). The symbol is unescaped, and so may differ /// from the original token's symbol. Str(Symbol, StrStyle), - /// A byte string (`b"foo"`). - ByteStr(Lrc<[u8]>), + /// A byte string (`b"foo"`). Not stored as a symbol because it might be + /// non-utf8, and symbols only allow utf8 strings. + ByteStr(Lrc<[u8]>, StrStyle), /// A byte char (`b'f'`). Byte(u8), /// A character literal (`'a'`). @@ -1822,7 +1823,7 @@ impl LitKind { /// Returns `true` if this literal is byte literal string. pub fn is_bytestr(&self) -> bool { - matches!(self, LitKind::ByteStr(_)) + matches!(self, LitKind::ByteStr(..)) } /// Returns `true` if this is a numeric literal. diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 5e6c94f1e6fc8..9f6fdf44ac0b5 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -1,11 +1,12 @@ //! Code related to parsing literals. -use crate::ast::{self, LitKind, MetaItemLit}; +use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; use std::ascii; +use std::str; #[derive(Debug)] pub enum LitError { @@ -115,9 +116,9 @@ impl LitKind { } }); error?; - LitKind::ByteStr(buf.into()) + LitKind::ByteStr(buf.into(), StrStyle::Cooked) } - token::ByteStrRaw(_) => { + token::ByteStrRaw(n) => { let s = symbol.as_str(); let bytes = if s.contains('\r') { let mut buf = Vec::with_capacity(s.len()); @@ -136,7 +137,7 @@ impl LitKind { symbol.to_string().into_bytes() }; - LitKind::ByteStr(bytes.into()) + LitKind::ByteStr(bytes.into(), StrStyle::Raw(n)) } token::Err => LitKind::Err, }) @@ -155,10 +156,15 @@ impl LitKind { (token::Str, symbol, None) } LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None), - LitKind::ByteStr(ref bytes) => { + LitKind::ByteStr(ref bytes, ast::StrStyle::Cooked) => { let string = bytes.escape_ascii().to_string(); (token::ByteStr, Symbol::intern(&string), None) } + LitKind::ByteStr(ref bytes, ast::StrStyle::Raw(n)) => { + // Unwrap because raw byte string literals can only contain ASCII. + let string = str::from_utf8(bytes).unwrap(); + (token::ByteStrRaw(n), Symbol::intern(&string), None) + } LitKind::Byte(byte) => { let string: String = ascii::escape_default(byte).map(Into::::into).collect(); (token::Byte, Symbol::intern(&string), None) diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 82912a733d552..e18bbcf65e719 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -97,7 +97,7 @@ impl<'hir> LoweringContext<'_, 'hir> { } ExprKind::IncludedBytes(bytes) => hir::ExprKind::Lit(respan( self.lower_span(e.span), - LitKind::ByteStr(bytes.clone()), + LitKind::ByteStr(bytes.clone(), StrStyle::Cooked), )), ExprKind::Cast(expr, ty) => { let expr = self.lower_expr(expr); diff --git a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs index 828b9d5ad5f68..7306b10d60ffb 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs @@ -323,7 +323,8 @@ impl<'a> State<'a> { self.print_token_literal(*token_lit, expr.span); } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); + let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked) + .synthesize_token_lit(); self.print_token_literal(lit, expr.span) } ast::ExprKind::Cast(expr, ty) => { diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 161e3499584e2..56b77fdf58050 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -69,7 +69,7 @@ fn invalid_type_err( Ok(ast::LitKind::Int(_, _)) => { cx.span_err(span, "numeric literal is not a `u8`"); } - Ok(ast::LitKind::ByteStr(_) | ast::LitKind::Byte(_)) => unreachable!(), + Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(), Err(err) => { report_lit_error(&cx.sess.parse_sess, err, token_lit, span); } @@ -97,7 +97,7 @@ fn handle_array_element( )) if val <= u8::MAX.into() => Some(val as u8), Ok(ast::LitKind::Byte(val)) => Some(val), - Ok(ast::LitKind::ByteStr(_)) => { + Ok(ast::LitKind::ByteStr(..)) => { if !*has_errors { cx.struct_span_err(expr.span, "cannot concatenate doubly nested array") .note("byte strings are treated as arrays of bytes") @@ -174,7 +174,7 @@ pub fn expand_concat_bytes( Ok(ast::LitKind::Byte(val)) => { accumulator.push(val); } - Ok(ast::LitKind::ByteStr(ref bytes)) => { + Ok(ast::LitKind::ByteStr(ref bytes, _)) => { accumulator.extend_from_slice(&bytes); } _ => { diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index 13e2d1ebbe786..d491e9e34a784 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -1234,7 +1234,7 @@ pub fn expr_to_spanned_string<'a>( Err(match expr.kind { ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), - Ok(ast::LitKind::ByteStr(_)) => { + Ok(ast::LitKind::ByteStr(..)) => { let mut err = cx.struct_span_err(expr.span, err_msg); let span = expr.span.shrink_to_lo(); err.span_suggestion( diff --git a/compiler/rustc_expand/src/build.rs b/compiler/rustc_expand/src/build.rs index b56e1a24834f0..d8245ff613a9d 100644 --- a/compiler/rustc_expand/src/build.rs +++ b/compiler/rustc_expand/src/build.rs @@ -361,7 +361,7 @@ impl<'a> ExtCtxt<'a> { } pub fn expr_byte_str(&self, sp: Span, bytes: Vec) -> P { - self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes))) + self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes), ast::StrStyle::Cooked)) } /// `[expr1, expr2, ...]` diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 57f66758ef005..255e5105ff4a9 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -526,7 +526,8 @@ impl server::TokenStream for Rustc<'_, '_> { Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span)) } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); + let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked) + .synthesize_token_lit(); Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span)) } ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index 86384c7b93e71..0d6b0175406fe 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -1169,7 +1169,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { match lit.node { ast::LitKind::Str(..) => tcx.mk_static_str(), - ast::LitKind::ByteStr(ref v) => { + ast::LitKind::ByteStr(ref v, _) => { tcx.mk_imm_ref(tcx.lifetimes.re_static, tcx.mk_array(tcx.types.u8, v.len() as u64)) } ast::LitKind::Byte(_) => tcx.types.u8, diff --git a/compiler/rustc_hir_typeck/src/pat.rs b/compiler/rustc_hir_typeck/src/pat.rs index decd317d9fc9b..6810353f9e778 100644 --- a/compiler/rustc_hir_typeck/src/pat.rs +++ b/compiler/rustc_hir_typeck/src/pat.rs @@ -386,7 +386,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // Byte string patterns behave the same way as array patterns // They can denote both statically and dynamically-sized byte arrays. let mut pat_ty = ty; - if let hir::ExprKind::Lit(Spanned { node: ast::LitKind::ByteStr(_), .. }) = lt.kind { + if let hir::ExprKind::Lit(Spanned { node: ast::LitKind::ByteStr(..), .. }) = lt.kind { let expected = self.structurally_resolved_type(span, expected); if let ty::Ref(_, inner_ty, _) = expected.kind() && matches!(inner_ty.kind(), ty::Slice(_)) diff --git a/compiler/rustc_mir_build/src/build/expr/as_constant.rs b/compiler/rustc_mir_build/src/build/expr/as_constant.rs index 717c62315745b..3b7ed818dc9b7 100644 --- a/compiler/rustc_mir_build/src/build/expr/as_constant.rs +++ b/compiler/rustc_mir_build/src/build/expr/as_constant.rs @@ -135,14 +135,14 @@ pub(crate) fn lit_to_mir_constant<'tcx>( let allocation = tcx.intern_const_alloc(allocation); ConstValue::Slice { data: allocation, start: 0, end: s.len() } } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Slice(_)) => { let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]); let allocation = tcx.intern_const_alloc(allocation); ConstValue::Slice { data: allocation, start: 0, end: data.len() } } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { let id = tcx.allocate_bytes(data); ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) } diff --git a/compiler/rustc_mir_build/src/thir/constant.rs b/compiler/rustc_mir_build/src/thir/constant.rs index a9ed945d4a15a..57ae6a3652df5 100644 --- a/compiler/rustc_mir_build/src/thir/constant.rs +++ b/compiler/rustc_mir_build/src/thir/constant.rs @@ -33,13 +33,13 @@ pub(crate) fn lit_to_const<'tcx>( let str_bytes = s.as_str().as_bytes(); ty::ValTree::from_raw_bytes(tcx, str_bytes) } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Slice(_)) => { let bytes = data as &[u8]; ty::ValTree::from_raw_bytes(tcx, bytes) } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { let bytes = data as &[u8]; ty::ValTree::from_raw_bytes(tcx, bytes) } diff --git a/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs b/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs index e0a607f9a95b6..6a4861747d267 100644 --- a/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs +++ b/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs @@ -33,7 +33,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidUtf8InUnchecked { if let Some([arg]) = match_function_call(cx, expr, &paths::STR_FROM_UTF8_UNCHECKED) { match &arg.kind { ExprKind::Lit(Spanned { node: lit, .. }) => { - if let LitKind::ByteStr(bytes) = &lit + if let LitKind::ByteStr(bytes, _) = &lit && std::str::from_utf8(bytes).is_err() { lint(cx, expr.span); diff --git a/src/tools/clippy/clippy_lints/src/large_include_file.rs b/src/tools/clippy/clippy_lints/src/large_include_file.rs index 84dd61a1e4b0d..424c0d9e79828 100644 --- a/src/tools/clippy/clippy_lints/src/large_include_file.rs +++ b/src/tools/clippy/clippy_lints/src/large_include_file.rs @@ -60,7 +60,7 @@ impl LateLintPass<'_> for LargeIncludeFile { then { let len = match &lit.node { // include_bytes - LitKind::ByteStr(bstr) => bstr.len(), + LitKind::ByteStr(bstr, _) => bstr.len(), // include_str LitKind::Str(sym, _) => sym.as_str().len(), _ => return, diff --git a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs index 168c1e4d2e60d..158e6caa4de54 100644 --- a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs +++ b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs @@ -282,7 +282,7 @@ impl<'a> NormalizedPat<'a> { // TODO: Handle negative integers. They're currently treated as a wild match. ExprKind::Lit(lit) => match lit.node { LitKind::Str(sym, _) => Self::LitStr(sym), - LitKind::ByteStr(ref bytes) => Self::LitBytes(bytes), + LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes), LitKind::Byte(val) => Self::LitInt(val.into()), LitKind::Char(val) => Self::LitInt(val.into()), LitKind::Int(val, _) => Self::LitInt(val), diff --git a/src/tools/clippy/clippy_lints/src/utils/author.rs b/src/tools/clippy/clippy_lints/src/utils/author.rs index 0c052d86eda40..bd7daf0773caf 100644 --- a/src/tools/clippy/clippy_lints/src/utils/author.rs +++ b/src/tools/clippy/clippy_lints/src/utils/author.rs @@ -299,7 +299,7 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> { }; kind!("Float(_, {float_ty})"); }, - LitKind::ByteStr(ref vec) => { + LitKind::ByteStr(ref vec, _) => { bind!(self, vec); kind!("ByteStr(ref {vec})"); chain!(self, "let [{:?}] = **{vec}", vec.value); diff --git a/src/tools/clippy/clippy_utils/src/check_proc_macro.rs b/src/tools/clippy/clippy_utils/src/check_proc_macro.rs index c6bf98b7b8bbd..43f0df145f0ec 100644 --- a/src/tools/clippy/clippy_utils/src/check_proc_macro.rs +++ b/src/tools/clippy/clippy_utils/src/check_proc_macro.rs @@ -69,7 +69,9 @@ fn lit_search_pat(lit: &LitKind) -> (Pat, Pat) { LitKind::Str(_, StrStyle::Cooked) => (Pat::Str("\""), Pat::Str("\"")), LitKind::Str(_, StrStyle::Raw(0)) => (Pat::Str("r"), Pat::Str("\"")), LitKind::Str(_, StrStyle::Raw(_)) => (Pat::Str("r#"), Pat::Str("#")), - LitKind::ByteStr(_) => (Pat::Str("b\""), Pat::Str("\"")), + LitKind::ByteStr(_, StrStyle::Cooked) => (Pat::Str("b\""), Pat::Str("\"")), + LitKind::ByteStr(_, StrStyle::Raw(0)) => (Pat::Str("br\""), Pat::Str("\"")), + LitKind::ByteStr(_, StrStyle::Raw(_)) => (Pat::Str("br#\""), Pat::Str("#")), LitKind::Byte(_) => (Pat::Str("b'"), Pat::Str("'")), LitKind::Char(_) => (Pat::Str("'"), Pat::Str("'")), LitKind::Int(_, LitIntType::Signed(IntTy::Isize)) => (Pat::Num, Pat::Str("isize")), diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs index 315aea9aa091b..7a637d32babec 100644 --- a/src/tools/clippy/clippy_utils/src/consts.rs +++ b/src/tools/clippy/clippy_utils/src/consts.rs @@ -210,7 +210,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option>) -> Constant { match *lit { LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Byte(b) => Constant::Int(u128::from(b)), - LitKind::ByteStr(ref s) => Constant::Binary(Lrc::clone(s)), + LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)), LitKind::Char(c) => Constant::Char(c), LitKind::Int(n, _) => Constant::Int(n), LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty { From 2fd364acff5f962b0ce4f4dffb5ae085d5f2b67a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Nov 2022 13:36:00 +1100 Subject: [PATCH 3/9] Remove `token::Lit` from `ast::MetaItemLit`. `token::Lit` contains a `kind` field that indicates what kind of literal it is. `ast::MetaItemLit` currently wraps a `token::Lit` but also has its own `kind` field. This means that `ast::MetaItemLit` encodes the literal kind in two different ways. This commit changes `ast::MetaItemLit` so it no longer wraps `token::Lit`. It now contains the `symbol` and `suffix` fields from `token::Lit`, but not the `kind` field, eliminating the redundancy. --- compiler/rustc_ast/src/ast.rs | 8 +++--- compiler/rustc_ast/src/attr/mod.rs | 4 ++- compiler/rustc_ast/src/util/literal.rs | 27 +++++++++++++++++-- compiler/rustc_ast_lowering/src/lib.rs | 3 ++- compiler/rustc_ast_pretty/src/pprust/state.rs | 2 +- compiler/rustc_builtin_macros/src/derive.rs | 10 ++++--- compiler/rustc_parse/src/parser/expr.rs | 11 ++++---- compiler/rustc_parse/src/parser/pat.rs | 2 +- src/tools/rustfmt/src/attr.rs | 12 +++++---- 9 files changed, 56 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index b869b2f8af994..c1795be229098 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1733,8 +1733,10 @@ pub enum StrStyle { /// A literal in a meta item. #[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)] pub struct MetaItemLit { - /// The original literal token as written in source code. - pub token_lit: token::Lit, + /// The original literal as written in the source code. + pub symbol: Symbol, + /// The original suffix as written in the source code. + pub suffix: Option, /// The "semantic" representation of the literal lowered from the original tokens. /// Strings are unescaped, hexadecimal forms are eliminated, etc. pub kind: LitKind, @@ -3103,7 +3105,7 @@ mod size_asserts { static_assert_size!(ItemKind, 112); static_assert_size!(LitKind, 24); static_assert_size!(Local, 72); - static_assert_size!(MetaItemLit, 48); + static_assert_size!(MetaItemLit, 40); static_assert_size!(Param, 40); static_assert_size!(Pat, 88); static_assert_size!(Path, 24); diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 1ba4691467586..2ec126715e79e 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -328,7 +328,9 @@ pub fn mk_name_value_item_str(ident: Ident, str: Symbol, str_span: Span) -> Meta } pub fn mk_name_value_item(ident: Ident, kind: LitKind, lit_span: Span) -> MetaItem { - let lit = MetaItemLit { token_lit: kind.synthesize_token_lit(), kind, span: lit_span }; + let token_lit = kind.synthesize_token_lit(); + let lit = + MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, kind, span: lit_span }; let span = ident.span.to(lit_span); MetaItem { path: Path::from_ident(ident), kind: MetaItemKind::NameValue(lit), span } } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 9f6fdf44ac0b5..a0a925d4700bd 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -202,9 +202,32 @@ impl LitKind { } impl MetaItemLit { - /// Converts token literal into a meta item literal. + /// Converts a token literal into a meta item literal. pub fn from_token_lit(token_lit: token::Lit, span: Span) -> Result { - Ok(MetaItemLit { token_lit, kind: LitKind::from_token_lit(token_lit)?, span }) + Ok(MetaItemLit { + symbol: token_lit.symbol, + suffix: token_lit.suffix, + kind: LitKind::from_token_lit(token_lit)?, + span, + }) + } + + /// Cheaply converts a meta item literal into a token literal. + pub fn as_token_lit(&self) -> token::Lit { + let kind = match self.kind { + LitKind::Bool(_) => token::Bool, + LitKind::Str(_, ast::StrStyle::Cooked) => token::Str, + LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n), + LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr, + LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n), + LitKind::Byte(_) => token::Byte, + LitKind::Char(_) => token::Char, + LitKind::Int(..) => token::Integer, + LitKind::Float(..) => token::Float, + LitKind::Err => token::Err, + }; + + token::Lit::new(kind, self.symbol, self.suffix) } /// Converts an arbitrary token into meta item literal. diff --git a/compiler/rustc_ast_lowering/src/lib.rs b/compiler/rustc_ast_lowering/src/lib.rs index 1d27970627854..8e4bfb7133606 100644 --- a/compiler/rustc_ast_lowering/src/lib.rs +++ b/compiler/rustc_ast_lowering/src/lib.rs @@ -954,7 +954,8 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> { lit } else { MetaItemLit { - token_lit: token::Lit::new(token::LitKind::Err, kw::Empty, None), + symbol: kw::Empty, + suffix: None, kind: LitKind::Err, span: DUMMY_SP, } diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index ebe55a4b77183..d555cf4873099 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -376,7 +376,7 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere } fn print_meta_item_lit(&mut self, lit: &ast::MetaItemLit) { - self.print_token_literal(lit.token_lit, lit.span) + self.print_token_literal(lit.as_token_lit(), lit.span) } fn print_token_literal(&mut self, token_lit: token::Lit, span: Span) { diff --git a/compiler/rustc_builtin_macros/src/derive.rs b/compiler/rustc_builtin_macros/src/derive.rs index fa5a45730ac7a..2a8dc02849ea7 100644 --- a/compiler/rustc_builtin_macros/src/derive.rs +++ b/compiler/rustc_builtin_macros/src/derive.rs @@ -1,7 +1,7 @@ use crate::cfg_eval::cfg_eval; use rustc_ast as ast; -use rustc_ast::{token, GenericParamKind, ItemKind, MetaItemKind, NestedMetaItem, StmtKind}; +use rustc_ast::{GenericParamKind, ItemKind, MetaItemKind, NestedMetaItem, StmtKind}; use rustc_errors::{struct_span_err, Applicability}; use rustc_expand::base::{Annotatable, ExpandResult, ExtCtxt, Indeterminate, MultiItemModifier}; use rustc_feature::AttributeTemplate; @@ -130,9 +130,11 @@ fn report_bad_target(sess: &Session, item: &Annotatable, span: Span) -> bool { } fn report_unexpected_meta_item_lit(sess: &Session, lit: &ast::MetaItemLit) { - let help_msg = match lit.token_lit.kind { - token::Str if rustc_lexer::is_ident(lit.token_lit.symbol.as_str()) => { - format!("try using `#[derive({})]`", lit.token_lit.symbol) + let help_msg = match lit.kind { + ast::LitKind::Str(_, ast::StrStyle::Cooked) + if rustc_lexer::is_ident(lit.symbol.as_str()) => + { + format!("try using `#[derive({})]`", lit.symbol) } _ => "for example, write `#[derive(Debug)]` for `Debug`".to_string(), }; diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index e0443a697b504..1c773bea000b2 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1551,7 +1551,7 @@ impl<'a> Parser<'a> { }) }); consume_colon = false; - Ok(self.mk_expr(lo, ExprKind::Lit(lit.token_lit))) + Ok(self.mk_expr(lo, ExprKind::Lit(lit.as_token_lit()))) } else if !ate_colon && (self.check_noexpect(&TokenKind::Comma) || self.check_noexpect(&TokenKind::Gt)) { @@ -1654,7 +1654,8 @@ impl<'a> Parser<'a> { } let name = lifetime.without_first_quote().name; ast::MetaItemLit { - token_lit: token::Lit::new(token::LitKind::Char, name, None), + symbol: name, + suffix: None, kind: ast::LitKind::Char(name.as_str().chars().next().unwrap_or('_')), span: lifetime.span, } @@ -1773,8 +1774,8 @@ impl<'a> Parser<'a> { Some(lit) => match lit.kind { ast::LitKind::Str(symbol_unescaped, style) => Ok(ast::StrLit { style, - symbol: lit.token_lit.symbol, - suffix: lit.token_lit.suffix, + symbol: lit.symbol, + suffix: lit.suffix, span: lit.span, symbol_unescaped, }), @@ -1817,7 +1818,7 @@ impl<'a> Parser<'a> { pub(super) fn parse_token_lit(&mut self) -> PResult<'a, (token::Lit, Span)> { self.parse_opt_token_lit() .ok_or(()) - .or_else(|()| self.handle_missing_lit().map(|lit| (lit.token_lit, lit.span))) + .or_else(|()| self.handle_missing_lit().map(|lit| (lit.as_token_lit(), lit.span))) } pub(super) fn parse_meta_item_lit(&mut self) -> PResult<'a, MetaItemLit> { diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index cbeec951e2dfe..b5147158f708e 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -420,7 +420,7 @@ impl<'a> Parser<'a> { err.span_label(self_.token.span, format!("expected {}", expected)); err }); - PatKind::Lit(self.mk_expr(lo, ExprKind::Lit(lit.token_lit))) + PatKind::Lit(self.mk_expr(lo, ExprKind::Lit(lit.as_token_lit()))) } else { // Try to parse everything else as literal with optional minus match self.parse_literal_maybe_minus() { diff --git a/src/tools/rustfmt/src/attr.rs b/src/tools/rustfmt/src/attr.rs index 2ac703b957b86..c503eeeb9b3b9 100644 --- a/src/tools/rustfmt/src/attr.rs +++ b/src/tools/rustfmt/src/attr.rs @@ -260,7 +260,9 @@ impl Rewrite for ast::NestedMetaItem { fn rewrite(&self, context: &RewriteContext<'_>, shape: Shape) -> Option { match self { ast::NestedMetaItem::MetaItem(ref meta_item) => meta_item.rewrite(context, shape), - ast::NestedMetaItem::Lit(ref l) => rewrite_literal(context, l.token_lit, l.span, shape), + ast::NestedMetaItem::Lit(ref l) => { + rewrite_literal(context, l.as_token_lit(), l.span, shape) + } } } } @@ -308,18 +310,18 @@ impl Rewrite for ast::MetaItem { }), )? } - ast::MetaItemKind::NameValue(ref literal) => { + ast::MetaItemKind::NameValue(ref lit) => { let path = rewrite_path(context, PathContext::Type, &None, &self.path, shape)?; // 3 = ` = ` let lit_shape = shape.shrink_left(path.len() + 3)?; - // `rewrite_literal` returns `None` when `literal` exceeds max + // `rewrite_literal` returns `None` when `lit` exceeds max // width. Since a literal is basically unformattable unless it // is a string literal (and only if `format_strings` is set), // we might be better off ignoring the fact that the attribute // is longer than the max width and continue on formatting. // See #2479 for example. - let value = rewrite_literal(context, literal.token_lit, literal.span, lit_shape) - .unwrap_or_else(|| context.snippet(literal.span).to_owned()); + let value = rewrite_literal(context, lit.as_token_lit(), lit.span, lit_shape) + .unwrap_or_else(|| context.snippet(lit.span).to_owned()); format!("{} = {}", path, value) } }) From d5526ff40d3213f6138c1c1311d0c1a0f5c40133 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Nov 2022 13:46:28 +1100 Subject: [PATCH 4/9] Reorder `StrLit` fields. To better match `MetaItemLit`. --- compiler/rustc_ast/src/ast.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index c1795be229098..fc70b68e27fd2 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1746,13 +1746,14 @@ pub struct MetaItemLit { /// Similar to `MetaItemLit`, but restricted to string literals. #[derive(Clone, Copy, Encodable, Decodable, Debug)] pub struct StrLit { - /// The original literal token as written in source code. - pub style: StrStyle, + /// The original literal as written in source code. pub symbol: Symbol, + /// The original suffix as written in source code. pub suffix: Option, - pub span: Span, - /// The unescaped "semantic" representation of the literal lowered from the original token. + /// The semantic (unescaped) representation of the literal. pub symbol_unescaped: Symbol, + pub style: StrStyle, + pub span: Span, } impl StrLit { From 7d30472180855735929e42595aefb4344b4c8562 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 5 Dec 2022 11:22:01 +1100 Subject: [PATCH 5/9] Remove `mk_name_value_item{,_str}`. There are better ways to create the meta items. - In the rustdoc tests, the commit adds `dummy_meta_item_name_value`, which matches the existing `dummy_meta_item_word` function and `dummy_meta_item_list` macro. - In `types.rs` the commit clones the existing meta item and then modifies the clone. --- compiler/rustc_ast/src/attr/mod.rs | 15 --------------- src/librustdoc/clean/cfg/tests.rs | 20 ++++++++++++++------ src/librustdoc/clean/types.rs | 14 ++++++-------- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 2ec126715e79e..c47b756c26ad1 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -1,6 +1,5 @@ //! Functions dealing with attributes and meta items. -use crate::ast; use crate::ast::{AttrArgs, AttrArgsEq, AttrId, AttrItem, AttrKind, AttrStyle, AttrVec, Attribute}; use crate::ast::{DelimArgs, Expr, ExprKind, LitKind, MetaItemLit}; use crate::ast::{MacDelimiter, MetaItem, MetaItemKind, NestedMetaItem, NormalAttr}; @@ -321,20 +320,6 @@ impl Attribute { } } -/* Constructors */ - -pub fn mk_name_value_item_str(ident: Ident, str: Symbol, str_span: Span) -> MetaItem { - mk_name_value_item(ident, LitKind::Str(str, ast::StrStyle::Cooked), str_span) -} - -pub fn mk_name_value_item(ident: Ident, kind: LitKind, lit_span: Span) -> MetaItem { - let token_lit = kind.synthesize_token_lit(); - let lit = - MetaItemLit { symbol: token_lit.symbol, suffix: token_lit.suffix, kind, span: lit_span }; - let span = ident.span.to(lit_span); - MetaItem { path: Path::from_ident(ident), kind: MetaItemKind::NameValue(lit), span } -} - pub struct AttrIdGenerator(WorkerLocal>); #[cfg(debug_assertions)] diff --git a/src/librustdoc/clean/cfg/tests.rs b/src/librustdoc/clean/cfg/tests.rs index 7f72d5d39a752..81f6767243683 100644 --- a/src/librustdoc/clean/cfg/tests.rs +++ b/src/librustdoc/clean/cfg/tests.rs @@ -1,9 +1,8 @@ use super::*; -use rustc_ast::attr; -use rustc_ast::Path; +use rustc_ast::{LitKind, MetaItemLit, Path, StrStyle}; use rustc_span::create_default_session_globals_then; -use rustc_span::symbol::{Ident, Symbol}; +use rustc_span::symbol::{kw, Ident, Symbol}; use rustc_span::DUMMY_SP; fn word_cfg(s: &str) -> Cfg { @@ -22,6 +21,15 @@ fn dummy_meta_item_word(name: &str) -> MetaItem { } } +fn dummy_meta_item_name_value(name: &str, symbol: Symbol, kind: LitKind) -> MetaItem { + let lit = MetaItemLit { symbol, suffix: None, kind, span: DUMMY_SP }; + MetaItem { + path: Path::from_ident(Ident::from_str(name)), + kind: MetaItemKind::NameValue(lit), + span: DUMMY_SP, + } +} + macro_rules! dummy_meta_item_list { ($name:ident, [$($list:ident),* $(,)?]) => { MetaItem { @@ -242,8 +250,8 @@ fn test_parse_ok() { let mi = dummy_meta_item_word("all"); assert_eq!(Cfg::parse(&mi), Ok(word_cfg("all"))); - let mi = - attr::mk_name_value_item_str(Ident::from_str("all"), Symbol::intern("done"), DUMMY_SP); + let done = Symbol::intern("done"); + let mi = dummy_meta_item_name_value("all", done, LitKind::Str(done, StrStyle::Cooked)); assert_eq!(Cfg::parse(&mi), Ok(name_value_cfg("all", "done"))); let mi = dummy_meta_item_list!(all, [a, b]); @@ -272,7 +280,7 @@ fn test_parse_ok() { #[test] fn test_parse_err() { create_default_session_globals_then(|| { - let mi = attr::mk_name_value_item(Ident::from_str("foo"), LitKind::Bool(false), DUMMY_SP); + let mi = dummy_meta_item_name_value("foo", kw::False, LitKind::Bool(false)); assert!(Cfg::parse(&mi).is_err()); let mi = dummy_meta_item_list!(not, [a, b]); diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index ed4e9508f4309..8c3b289d34628 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -10,7 +10,6 @@ use std::{cmp, fmt, iter}; use arrayvec::ArrayVec; use thin_vec::ThinVec; -use rustc_ast::attr; use rustc_ast::util::comments::beautify_doc_string; use rustc_ast::{self as ast, AttrStyle}; use rustc_attr::{ConstStability, Deprecation, Stability, StabilityLevel}; @@ -27,7 +26,6 @@ use rustc_middle::ty::fast_reject::SimplifiedType; use rustc_middle::ty::{self, DefIdTree, TyCtxt, Visibility}; use rustc_session::Session; use rustc_span::hygiene::MacroKind; -use rustc_span::source_map::DUMMY_SP; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::{self, FileName, Loc}; use rustc_target::abi::VariantIdx; @@ -979,12 +977,12 @@ impl AttributesExt for [ast::Attribute] { // #[doc(cfg(target_feature = "feat"))] attributes as well for attr in self.lists(sym::target_feature) { if attr.has_name(sym::enable) { - if let Some(feat) = attr.value_str() { - let meta = attr::mk_name_value_item_str( - Ident::with_dummy_span(sym::target_feature), - feat, - DUMMY_SP, - ); + if attr.value_str().is_some() { + // Clone `enable = "feat"`, change to `target_feature = "feat"`. + // Unwrap is safe because `value_str` succeeded above. + let mut meta = attr.meta_item().unwrap().clone(); + meta.path = ast::Path::from_ident(Ident::with_dummy_span(sym::target_feature)); + if let Ok(feat_cfg) = Cfg::parse(&meta) { cfg &= feat_cfg; } From 4ae956f600e72d62a6b17d95705148442841cef0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 5 Dec 2022 14:16:41 +1100 Subject: [PATCH 6/9] Remove `ExtCtxt::expr_lit`. --- compiler/rustc_ast/src/util/literal.rs | 34 +++++++++++++------- compiler/rustc_expand/src/build.rs | 43 +++++++++++++------------- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index a0a925d4700bd..4cc4b6367b48b 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -8,6 +8,26 @@ use rustc_span::Span; use std::ascii; use std::str; +// Escapes a string, represented as a symbol. Reuses the original symbol, +// avoiding interning, if no changes are required. +pub fn escape_string_symbol(symbol: Symbol) -> Symbol { + let s = symbol.as_str(); + let escaped = s.escape_default().to_string(); + if s == escaped { symbol } else { Symbol::intern(&escaped) } +} + +// Escapes a char. +pub fn escape_char_symbol(ch: char) -> Symbol { + let s: String = ch.escape_default().map(Into::::into).collect(); + Symbol::intern(&s) +} + +// Escapes a byte string. +pub fn escape_byte_str_symbol(bytes: &[u8]) -> Symbol { + let s = bytes.escape_ascii().to_string(); + Symbol::intern(&s) +} + #[derive(Debug)] pub enum LitError { LexerError, @@ -149,16 +169,11 @@ impl LitKind { pub fn synthesize_token_lit(&self) -> token::Lit { let (kind, symbol, suffix) = match *self { LitKind::Str(symbol, ast::StrStyle::Cooked) => { - // Don't re-intern unless the escaped string is different. - let s = symbol.as_str(); - let escaped = s.escape_default().to_string(); - let symbol = if s == escaped { symbol } else { Symbol::intern(&escaped) }; - (token::Str, symbol, None) + (token::Str, escape_string_symbol(symbol), None) } LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None), LitKind::ByteStr(ref bytes, ast::StrStyle::Cooked) => { - let string = bytes.escape_ascii().to_string(); - (token::ByteStr, Symbol::intern(&string), None) + (token::ByteStr, escape_byte_str_symbol(bytes), None) } LitKind::ByteStr(ref bytes, ast::StrStyle::Raw(n)) => { // Unwrap because raw byte string literals can only contain ASCII. @@ -169,10 +184,7 @@ impl LitKind { let string: String = ascii::escape_default(byte).map(Into::::into).collect(); (token::Byte, Symbol::intern(&string), None) } - LitKind::Char(ch) => { - let string: String = ch.escape_default().map(Into::::into).collect(); - (token::Char, Symbol::intern(&string), None) - } + LitKind::Char(ch) => (token::Char, escape_char_symbol(ch), None), LitKind::Int(n, ty) => { let suffix = match ty { ast::LitIntType::Unsigned(ty) => Some(ty.name()), diff --git a/compiler/rustc_expand/src/build.rs b/compiler/rustc_expand/src/build.rs index d8245ff613a9d..2fec24a1aece8 100644 --- a/compiler/rustc_expand/src/build.rs +++ b/compiler/rustc_expand/src/build.rs @@ -1,8 +1,7 @@ use crate::base::ExtCtxt; -use rustc_ast::attr; use rustc_ast::ptr::P; use rustc_ast::{self as ast, AttrVec, BlockCheckMode, Expr, LocalKind, PatKind, UnOp}; -use rustc_data_structures::sync::Lrc; +use rustc_ast::{attr, token, util::literal}; use rustc_span::source_map::Spanned; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::Span; @@ -332,36 +331,36 @@ impl<'a> ExtCtxt<'a> { self.expr_struct(span, self.path_ident(span, id), fields) } - fn expr_lit(&self, span: Span, lit_kind: ast::LitKind) -> P { - let token_lit = lit_kind.synthesize_token_lit(); - self.expr(span, ast::ExprKind::Lit(token_lit)) + pub fn expr_usize(&self, span: Span, n: usize) -> P { + let suffix = Some(ast::UintTy::Usize.name()); + let lit = token::Lit::new(token::Integer, sym::integer(n), suffix); + self.expr(span, ast::ExprKind::Lit(lit)) } - pub fn expr_usize(&self, span: Span, i: usize) -> P { - self.expr_lit( - span, - ast::LitKind::Int(i as u128, ast::LitIntType::Unsigned(ast::UintTy::Usize)), - ) - } - - pub fn expr_u32(&self, sp: Span, u: u32) -> P { - self.expr_lit(sp, ast::LitKind::Int(u as u128, ast::LitIntType::Unsigned(ast::UintTy::U32))) + pub fn expr_u32(&self, span: Span, n: u32) -> P { + let suffix = Some(ast::UintTy::U32.name()); + let lit = token::Lit::new(token::Integer, sym::integer(n), suffix); + self.expr(span, ast::ExprKind::Lit(lit)) } - pub fn expr_bool(&self, sp: Span, value: bool) -> P { - self.expr_lit(sp, ast::LitKind::Bool(value)) + pub fn expr_bool(&self, span: Span, value: bool) -> P { + let lit = token::Lit::new(token::Bool, if value { kw::True } else { kw::False }, None); + self.expr(span, ast::ExprKind::Lit(lit)) } - pub fn expr_str(&self, sp: Span, s: Symbol) -> P { - self.expr_lit(sp, ast::LitKind::Str(s, ast::StrStyle::Cooked)) + pub fn expr_str(&self, span: Span, s: Symbol) -> P { + let lit = token::Lit::new(token::Str, literal::escape_string_symbol(s), None); + self.expr(span, ast::ExprKind::Lit(lit)) } - pub fn expr_char(&self, sp: Span, ch: char) -> P { - self.expr_lit(sp, ast::LitKind::Char(ch)) + pub fn expr_char(&self, span: Span, ch: char) -> P { + let lit = token::Lit::new(token::Char, literal::escape_char_symbol(ch), None); + self.expr(span, ast::ExprKind::Lit(lit)) } - pub fn expr_byte_str(&self, sp: Span, bytes: Vec) -> P { - self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes), ast::StrStyle::Cooked)) + pub fn expr_byte_str(&self, span: Span, bytes: Vec) -> P { + let lit = token::Lit::new(token::ByteStr, literal::escape_byte_str_symbol(&bytes), None); + self.expr(span, ast::ExprKind::Lit(lit)) } /// `[expr1, expr2, ...]` From d887615b4c83d856cd3e40000968c047f2ff4019 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 5 Dec 2022 14:39:56 +1100 Subject: [PATCH 7/9] Parameterise `Parser::{recover_unclosed_char,handle_missing_lit}`. These two methods both produce a `MetaItemLit`, and then some of the call sites convert the `MetaItemLit` to a `token::Lit` with `as_token_lit`. This commit parameterises these two methods with a `mk_lit_char` closure, which can be used to produce either `MetaItemLit` or `token::Lit` directly as necessary. --- compiler/rustc_parse/src/parser/expr.rs | 57 ++++++++++++++++--------- compiler/rustc_parse/src/parser/pat.rs | 22 ++++++---- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 1c773bea000b2..07f03e0d582cb 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1543,15 +1543,16 @@ impl<'a> Parser<'a> { && (matches!(self.token.kind, token::CloseDelim(_) | token::Comma) || self.token.is_op()) { - let lit = self.recover_unclosed_char(label_.ident, |self_| { - self_.sess.create_err(UnexpectedTokenAfterLabel { - span: self_.token.span, - remove_label: None, - enclose_in_block: None, - }) - }); + let (lit, _) = + self.recover_unclosed_char(label_.ident, Parser::mk_token_lit_char, |self_| { + self_.sess.create_err(UnexpectedTokenAfterLabel { + span: self_.token.span, + remove_label: None, + enclose_in_block: None, + }) + }); consume_colon = false; - Ok(self.mk_expr(lo, ExprKind::Lit(lit.as_token_lit()))) + Ok(self.mk_expr(lo, ExprKind::Lit(lit))) } else if !ate_colon && (self.check_noexpect(&TokenKind::Comma) || self.check_noexpect(&TokenKind::Gt)) { @@ -1626,12 +1627,13 @@ impl<'a> Parser<'a> { Ok(expr) } - /// Emit an error when a char is parsed as a lifetime because of a missing quote - pub(super) fn recover_unclosed_char( + /// Emit an error when a char is parsed as a lifetime because of a missing quote. + pub(super) fn recover_unclosed_char( &self, lifetime: Ident, + mk_lit_char: impl FnOnce(Symbol, Span) -> L, err: impl FnOnce(&Self) -> DiagnosticBuilder<'a, ErrorGuaranteed>, - ) -> ast::MetaItemLit { + ) -> L { if let Some(mut diag) = self.sess.span_diagnostic.steal_diagnostic(lifetime.span, StashKey::LifetimeIsChar) { @@ -1653,12 +1655,7 @@ impl<'a> Parser<'a> { .emit(); } let name = lifetime.without_first_quote().name; - ast::MetaItemLit { - symbol: name, - suffix: None, - kind: ast::LitKind::Char(name.as_str().chars().next().unwrap_or('_')), - span: lifetime.span, - } + mk_lit_char(name, lifetime.span) } /// Recover on the syntax `do catch { ... }` suggesting `try { ... }` instead. @@ -1785,7 +1782,23 @@ impl<'a> Parser<'a> { } } - fn handle_missing_lit(&mut self) -> PResult<'a, MetaItemLit> { + pub(crate) fn mk_token_lit_char(name: Symbol, span: Span) -> (token::Lit, Span) { + (token::Lit { symbol: name, suffix: None, kind: token::Char }, span) + } + + fn mk_meta_item_lit_char(name: Symbol, span: Span) -> MetaItemLit { + ast::MetaItemLit { + symbol: name, + suffix: None, + kind: ast::LitKind::Char(name.as_str().chars().next().unwrap_or('_')), + span, + } + } + + fn handle_missing_lit( + &mut self, + mk_lit_char: impl FnOnce(Symbol, Span) -> L, + ) -> PResult<'a, L> { if let token::Interpolated(inner) = &self.token.kind { let expr = match inner.as_ref() { token::NtExpr(expr) => Some(expr), @@ -1809,7 +1822,7 @@ impl<'a> Parser<'a> { // On an error path, eagerly consider a lifetime to be an unclosed character lit if self.token.is_lifetime() { let lt = self.expect_lifetime(); - Ok(self.recover_unclosed_char(lt.ident, err)) + Ok(self.recover_unclosed_char(lt.ident, mk_lit_char, err)) } else { Err(err(self)) } @@ -1818,11 +1831,13 @@ impl<'a> Parser<'a> { pub(super) fn parse_token_lit(&mut self) -> PResult<'a, (token::Lit, Span)> { self.parse_opt_token_lit() .ok_or(()) - .or_else(|()| self.handle_missing_lit().map(|lit| (lit.as_token_lit(), lit.span))) + .or_else(|()| self.handle_missing_lit(Parser::mk_token_lit_char)) } pub(super) fn parse_meta_item_lit(&mut self) -> PResult<'a, MetaItemLit> { - self.parse_opt_meta_item_lit().ok_or(()).or_else(|()| self.handle_missing_lit()) + self.parse_opt_meta_item_lit() + .ok_or(()) + .or_else(|()| self.handle_missing_lit(Parser::mk_meta_item_lit_char)) } fn recover_after_dot(&mut self) -> Option { diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index b5147158f708e..a1981e1147775 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -411,16 +411,20 @@ impl<'a> Parser<'a> { { // Recover a `'a` as a `'a'` literal let lt = self.expect_lifetime(); - let lit = self.recover_unclosed_char(lt.ident, |self_| { - let expected = expected.unwrap_or("pattern"); - let msg = - format!("expected {}, found {}", expected, super::token_descr(&self_.token)); + let (lit, _) = + self.recover_unclosed_char(lt.ident, Parser::mk_token_lit_char, |self_| { + let expected = expected.unwrap_or("pattern"); + let msg = format!( + "expected {}, found {}", + expected, + super::token_descr(&self_.token) + ); - let mut err = self_.struct_span_err(self_.token.span, &msg); - err.span_label(self_.token.span, format!("expected {}", expected)); - err - }); - PatKind::Lit(self.mk_expr(lo, ExprKind::Lit(lit.as_token_lit()))) + let mut err = self_.struct_span_err(self_.token.span, &msg); + err.span_label(self_.token.span, format!("expected {}", expected)); + err + }); + PatKind::Lit(self.mk_expr(lo, ExprKind::Lit(lit))) } else { // Try to parse everything else as literal with optional minus match self.parse_literal_maybe_minus() { From 568e647047e2a3b817a3f39d2ecb25989a4981ce Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 5 Dec 2022 15:23:27 +1100 Subject: [PATCH 8/9] Remove three uses of `LitKind::synthesize_token_lit`. --- compiler/rustc_ast/src/attr/mod.rs | 5 +++-- compiler/rustc_ast_pretty/src/pprust/state/expr.rs | 5 +++-- compiler/rustc_expand/src/proc_macro_server.rs | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index c47b756c26ad1..d99f6ed2c1cd3 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -3,12 +3,13 @@ use crate::ast::{AttrArgs, AttrArgsEq, AttrId, AttrItem, AttrKind, AttrStyle, AttrVec, Attribute}; use crate::ast::{DelimArgs, Expr, ExprKind, LitKind, MetaItemLit}; use crate::ast::{MacDelimiter, MetaItem, MetaItemKind, NestedMetaItem, NormalAttr}; -use crate::ast::{Path, PathSegment, StrStyle, DUMMY_NODE_ID}; +use crate::ast::{Path, PathSegment, DUMMY_NODE_ID}; use crate::ptr::P; use crate::token::{self, CommentKind, Delimiter, Token}; use crate::tokenstream::{DelimSpan, Spacing, TokenTree}; use crate::tokenstream::{LazyAttrTokenStream, TokenStream}; use crate::util::comments; +use crate::util::literal::escape_string_symbol; use rustc_data_structures::sync::WorkerLocal; use rustc_index::bit_set::GrowableBitSet; use rustc_span::symbol::{sym, Ident, Symbol}; @@ -395,7 +396,7 @@ pub fn mk_attr_name_value_str( val: Symbol, span: Span, ) -> Attribute { - let lit = LitKind::Str(val, StrStyle::Cooked).synthesize_token_lit(); + let lit = token::Lit::new(token::Str, escape_string_symbol(val), None); let expr = P(Expr { id: DUMMY_NODE_ID, kind: ExprKind::Lit(lit), diff --git a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs index 7306b10d60ffb..a00837ec84303 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs @@ -2,6 +2,8 @@ use crate::pp::Breaks::Inconsistent; use crate::pprust::state::{AnnNode, IterDelimited, PrintState, State, INDENT_UNIT}; use rustc_ast::ptr::P; +use rustc_ast::token; +use rustc_ast::util::literal::escape_byte_str_symbol; use rustc_ast::util::parser::{self, AssocOp, Fixity}; use rustc_ast::{self as ast, BlockCheckMode}; @@ -323,8 +325,7 @@ impl<'a> State<'a> { self.print_token_literal(*token_lit, expr.span); } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked) - .synthesize_token_lit(); + let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None); self.print_token_literal(lit, expr.span) } ast::ExprKind::Cast(expr, ty) => { diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 255e5105ff4a9..768bdab8a5419 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -6,6 +6,7 @@ use pm::{Delimiter, Level, LineColumn}; use rustc_ast as ast; use rustc_ast::token; use rustc_ast::tokenstream::{self, Spacing::*, TokenStream}; +use rustc_ast::util::literal::escape_byte_str_symbol; use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; @@ -526,8 +527,7 @@ impl server::TokenStream for Rustc<'_, '_> { Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span)) } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked) - .synthesize_token_lit(); + let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None); Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span)) } ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { From 7e0c6dba0d83dbee96bbf7eac7b4cb563e297a5f Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 5 Dec 2022 16:09:45 +1100 Subject: [PATCH 9/9] Remove `LitKind::synthesize_token_lit`. It has a single call site in the HIR pretty printer, where the resulting token lit is immediately converted to a string. This commit replaces `LitKind::synthesize_token_lit` with a `Display` impl for `LitKind`, which can be used by the HIR pretty printer. --- compiler/rustc_ast/src/util/literal.rs | 83 ++++++++++++++------------ compiler/rustc_hir_pretty/src/lib.rs | 2 +- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 4cc4b6367b48b..762fd00e409fb 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -5,8 +5,7 @@ use crate::token::{self, Token}; use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; -use std::ascii; -use std::str; +use std::{ascii, fmt, str}; // Escapes a string, represented as a symbol. Reuses the original symbol, // avoiding interning, if no changes are required. @@ -162,54 +161,60 @@ impl LitKind { token::Err => LitKind::Err, }) } +} - /// Synthesizes a token from a semantic literal. - /// This function is used when the original token doesn't exist (e.g. the literal is created - /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). - pub fn synthesize_token_lit(&self) -> token::Lit { - let (kind, symbol, suffix) = match *self { - LitKind::Str(symbol, ast::StrStyle::Cooked) => { - (token::Str, escape_string_symbol(symbol), None) +impl fmt::Display for LitKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + LitKind::Byte(b) => { + let b: String = ascii::escape_default(b).map(Into::::into).collect(); + write!(f, "b'{}'", b)?; } - LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None), - LitKind::ByteStr(ref bytes, ast::StrStyle::Cooked) => { - (token::ByteStr, escape_byte_str_symbol(bytes), None) + LitKind::Char(ch) => write!(f, "'{}'", escape_char_symbol(ch))?, + LitKind::Str(sym, StrStyle::Cooked) => write!(f, "\"{}\"", escape_string_symbol(sym))?, + LitKind::Str(sym, StrStyle::Raw(n)) => write!( + f, + "r{delim}\"{string}\"{delim}", + delim = "#".repeat(n as usize), + string = sym + )?, + LitKind::ByteStr(ref bytes, StrStyle::Cooked) => { + write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))? } - LitKind::ByteStr(ref bytes, ast::StrStyle::Raw(n)) => { + LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => { // Unwrap because raw byte string literals can only contain ASCII. - let string = str::from_utf8(bytes).unwrap(); - (token::ByteStrRaw(n), Symbol::intern(&string), None) - } - LitKind::Byte(byte) => { - let string: String = ascii::escape_default(byte).map(Into::::into).collect(); - (token::Byte, Symbol::intern(&string), None) + let symbol = str::from_utf8(bytes).unwrap(); + write!( + f, + "br{delim}\"{string}\"{delim}", + delim = "#".repeat(n as usize), + string = symbol + )?; } - LitKind::Char(ch) => (token::Char, escape_char_symbol(ch), None), LitKind::Int(n, ty) => { - let suffix = match ty { - ast::LitIntType::Unsigned(ty) => Some(ty.name()), - ast::LitIntType::Signed(ty) => Some(ty.name()), - ast::LitIntType::Unsuffixed => None, - }; - (token::Integer, sym::integer(n), suffix) + write!(f, "{}", n)?; + match ty { + ast::LitIntType::Unsigned(ty) => write!(f, "{}", ty.name())?, + ast::LitIntType::Signed(ty) => write!(f, "{}", ty.name())?, + ast::LitIntType::Unsuffixed => {} + } } LitKind::Float(symbol, ty) => { - let suffix = match ty { - ast::LitFloatType::Suffixed(ty) => Some(ty.name()), - ast::LitFloatType::Unsuffixed => None, - }; - (token::Float, symbol, suffix) + write!(f, "{}", symbol)?; + match ty { + ast::LitFloatType::Suffixed(ty) => write!(f, "{}", ty.name())?, + ast::LitFloatType::Unsuffixed => {} + } } - LitKind::Bool(value) => { - let symbol = if value { kw::True } else { kw::False }; - (token::Bool, symbol, None) + LitKind::Bool(b) => write!(f, "{}", if b { "true" } else { "false" })?, + LitKind::Err => { + // This only shows up in places like `-Zunpretty=hir` output, so we + // don't bother to produce something useful. + write!(f, "")?; } - // This only shows up in places like `-Zunpretty=hir` output, so we - // don't bother to produce something useful. - LitKind::Err => (token::Err, Symbol::intern(""), None), - }; + } - token::Lit::new(kind, symbol, suffix) + Ok(()) } } diff --git a/compiler/rustc_hir_pretty/src/lib.rs b/compiler/rustc_hir_pretty/src/lib.rs index 10b2265c522a0..81d933b8e7f0f 100644 --- a/compiler/rustc_hir_pretty/src/lib.rs +++ b/compiler/rustc_hir_pretty/src/lib.rs @@ -1256,7 +1256,7 @@ impl<'a> State<'a> { fn print_literal(&mut self, lit: &hir::Lit) { self.maybe_print_comment(lit.span.lo()); - self.word(lit.node.synthesize_token_lit().to_string()) + self.word(lit.node.to_string()) } fn print_inline_asm(&mut self, asm: &hir::InlineAsm<'_>) {