Skip to content

Commit

Permalink
Auto merge of rust-lang#103812 - clubby789:improve-include-bytes, r=p…
Browse files Browse the repository at this point in the history
…etrochenkov

Delay `include_bytes` to AST lowering

Hopefully addresses rust-lang#65818.
This PR introduces a new `ExprKind::IncludedBytes` which stores the path and bytes of a file included with `include_bytes!()`. We can then create a literal from the bytes during AST lowering, which means we don't need to escape the bytes into valid UTF8 which is the cause of most of the overhead of embedding large binary blobs.
  • Loading branch information
bors committed Nov 12, 2022
2 parents aa05f99 + b2da155 commit 8ef2485
Show file tree
Hide file tree
Showing 19 changed files with 78 additions and 15 deletions.
8 changes: 7 additions & 1 deletion compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1208,7 +1208,7 @@ impl Expr {
ExprKind::Tup(_) => ExprPrecedence::Tup,
ExprKind::Binary(op, ..) => ExprPrecedence::Binary(op.node),
ExprKind::Unary(..) => ExprPrecedence::Unary,
ExprKind::Lit(_) => ExprPrecedence::Lit,
ExprKind::Lit(_) | ExprKind::IncludedBytes(..) => ExprPrecedence::Lit,
ExprKind::Type(..) | ExprKind::Cast(..) => ExprPrecedence::Cast,
ExprKind::Let(..) => ExprPrecedence::Let,
ExprKind::If(..) => ExprPrecedence::If,
Expand Down Expand Up @@ -1446,6 +1446,12 @@ pub enum ExprKind {
/// with an optional value to be returned.
Yeet(Option<P<Expr>>),

/// Bytes included via `include_bytes!`
/// Added for optimization purposes to avoid the need to escape
/// large binary blobs - should always behave like [`ExprKind::Lit`]
/// with a `ByteStr` literal.
IncludedBytes(Lrc<[u8]>),

/// Placeholder for an expression that wasn't syntactically well formed in some way.
Err,
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/mut_visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,7 @@ pub fn noop_visit_expr<T: MutVisitor>(
}
ExprKind::Try(expr) => vis.visit_expr(expr),
ExprKind::TryBlock(body) => vis.visit_block(body),
ExprKind::Lit(_) | ExprKind::Err => {}
ExprKind::Lit(_) | ExprKind::IncludedBytes(..) | ExprKind::Err => {}
}
vis.visit_id(id);
vis.visit_span(span);
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use crate::ast::{self, Lit, LitKind};
use crate::token::{self, Token};
use rustc_data_structures::sync::Lrc;
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
Expand Down Expand Up @@ -231,6 +232,13 @@ impl Lit {
Lit { token_lit: kind.to_token_lit(), kind, span }
}

/// Recovers an AST literal from a string of bytes produced by `include_bytes!`.
/// This requires ASCII-escaping the string, which can result in poor performance
/// for very large strings of bytes.
pub fn from_included_bytes(bytes: &Lrc<[u8]>, span: Span) -> Lit {
Self::from_lit_kind(LitKind::ByteStr(bytes.clone()), span)
}

/// Losslessly convert an AST literal into a token.
pub fn to_token(&self) -> Token {
let kind = match self.token_lit.kind {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/visit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,7 @@ pub fn walk_expr<'a, V: Visitor<'a>>(visitor: &mut V, expression: &'a Expr) {
}
ExprKind::Try(ref subexpression) => visitor.visit_expr(subexpression),
ExprKind::TryBlock(ref body) => visitor.visit_block(body),
ExprKind::Lit(_) | ExprKind::Err => {}
ExprKind::Lit(_) | ExprKind::IncludedBytes(..) | ExprKind::Err => {}
}

visitor.visit_expr_post(expression)
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_ast_lowering/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
ExprKind::Lit(ref l) => {
hir::ExprKind::Lit(respan(self.lower_span(l.span), l.kind.clone()))
}
ExprKind::IncludedBytes(ref bytes) => hir::ExprKind::Lit(respan(
self.lower_span(e.span),
LitKind::ByteStr(bytes.clone()),
)),
ExprKind::Cast(ref expr, ref ty) => {
let expr = self.lower_expr(expr);
let ty =
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_ast_lowering/src/pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,10 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
// ```
fn lower_expr_within_pat(&mut self, expr: &Expr, allow_paths: bool) -> &'hir hir::Expr<'hir> {
match expr.kind {
ExprKind::Lit(..) | ExprKind::ConstBlock(..) | ExprKind::Err => {}
ExprKind::Lit(..)
| ExprKind::ConstBlock(..)
| ExprKind::IncludedBytes(..)
| ExprKind::Err => {}
ExprKind::Path(..) if allow_paths => {}
ExprKind::Unary(UnOp::Neg, ref inner) if matches!(inner.kind, ExprKind::Lit(_)) => {}
_ => {
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_ast_pretty/src/pprust/state/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ impl<'a> State<'a> {
ast::ExprKind::Lit(ref lit) => {
self.print_literal(lit);
}
ast::ExprKind::IncludedBytes(ref bytes) => {
let lit = ast::Lit::from_included_bytes(bytes, expr.span);
self.print_literal(&lit)
}
ast::ExprKind::Cast(ref expr, ref ty) => {
let prec = AssocOp::As.precedence() as i8;
self.print_expr_maybe_paren(expr, prec);
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_builtin_macros/src/assert/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
| ExprKind::Field(_, _)
| ExprKind::ForLoop(_, _, _, _)
| ExprKind::If(_, _, _)
| ExprKind::IncludedBytes(..)
| ExprKind::InlineAsm(_)
| ExprKind::Let(_, _, _)
| ExprKind::Lit(_)
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_builtin_macros/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ pub fn expand_concat(
has_errors = true;
}
},
ast::ExprKind::IncludedBytes(..) => {
cx.span_err(e.span, "cannot concatenate a byte string literal")
}
ast::ExprKind::Err => {
has_errors = true;
}
Expand Down
13 changes: 13 additions & 0 deletions compiler/rustc_builtin_macros/src/concat_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ fn handle_array_element(
None
}
},
ast::ExprKind::IncludedBytes(..) => {
if !*has_errors {
cx.struct_span_err(expr.span, "cannot concatenate doubly nested array")
.note("byte strings are treated as arrays of bytes")
.help("try flattening the array")
.emit();
}
*has_errors = true;
None
}
_ => {
missing_literals.push(expr.span);
None
Expand Down Expand Up @@ -167,6 +177,9 @@ pub fn expand_concat_bytes(
has_errors = true;
}
},
ast::ExprKind::IncludedBytes(ref bytes) => {
accumulator.extend_from_slice(bytes);
}
ast::ExprKind::Err => {
has_errors = true;
}
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_builtin_macros/src/source_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,10 @@ pub fn expand_include_bytes(
}
};
match cx.source_map().load_binary_file(&file) {
Ok(bytes) => base::MacEager::expr(cx.expr_byte_str(sp, bytes)),
Ok(bytes) => {
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes.into()));
base::MacEager::expr(expr)
}
Err(e) => {
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
DummyResult::any(sp)
Expand Down
7 changes: 7 additions & 0 deletions compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,13 @@ impl server::TokenStream for Rustc<'_, '_> {
ast::ExprKind::Lit(l) => {
Ok(tokenstream::TokenStream::token_alone(token::Literal(l.token_lit), l.span))
}
ast::ExprKind::IncludedBytes(bytes) => {
let lit = ast::Lit::from_included_bytes(bytes, expr.span);
Ok(tokenstream::TokenStream::token_alone(
token::TokenKind::Literal(lit.token_lit),
expr.span,
))
}
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {
ast::ExprKind::Lit(l) => match l.token_lit {
token::Lit { kind: token::Integer | token::Float, .. } => {
Expand Down
4 changes: 3 additions & 1 deletion compiler/rustc_parse/src/parser/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,9 @@ impl<'a> Parser<'a> {
/// - A single-segment path.
pub(super) fn expr_is_valid_const_arg(&self, expr: &P<rustc_ast::Expr>) -> bool {
match &expr.kind {
ast::ExprKind::Block(_, _) | ast::ExprKind::Lit(_) => true,
ast::ExprKind::Block(_, _)
| ast::ExprKind::Lit(_)
| ast::ExprKind::IncludedBytes(..) => true,
ast::ExprKind::Unary(ast::UnOp::Neg, expr) => {
matches!(expr.kind, ast::ExprKind::Lit(_))
}
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_passes/src/hir_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -560,13 +560,14 @@ impl<'v> ast_visit::Visitor<'v> for StatCollector<'v> {
}

fn visit_expr(&mut self, e: &'v ast::Expr) {
#[rustfmt::skip]
record_variants!(
(self, e, e.kind, Id::None, ast, Expr, ExprKind),
[
Box, Array, ConstBlock, Call, MethodCall, Tup, Binary, Unary, Lit, Cast, Type, Let,
If, While, ForLoop, Loop, Match, Closure, Block, Async, Await, TryBlock, Assign,
AssignOp, Field, Index, Range, Underscore, Path, AddrOf, Break, Continue, Ret,
InlineAsm, MacCall, Struct, Repeat, Paren, Try, Yield, Yeet, Err
InlineAsm, MacCall, Struct, Repeat, Paren, Try, Yield, Yeet, IncludedBytes, Err
]
);
ast_visit::walk_expr(self, e)
Expand Down
7 changes: 6 additions & 1 deletion src/test/ui/proc-macro/expand-expr.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// aux-build:expand-expr.rs

#![feature(concat_bytes)]
extern crate expand_expr;

use expand_expr::{
Expand All @@ -23,6 +23,11 @@ expand_expr_is!(
concat!("contents: ", include_str!("auxiliary/included-file.txt"))
);

expand_expr_is!(
b"contents: Included file contents\n",
concat_bytes!(b"contents: ", include_bytes!("auxiliary/included-file.txt"))
);

// Correct value is checked for multiple sources.
check_expand_expr_file!(file!());

Expand Down
14 changes: 7 additions & 7 deletions src/test/ui/proc-macro/expand-expr.stderr
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
error: expected one of `.`, `?`, or an operator, found `;`
--> $DIR/expand-expr.rs:101:27
--> $DIR/expand-expr.rs:106:27
|
LL | expand_expr_fail!("string"; hello);
| ^ expected one of `.`, `?`, or an operator

error: expected expression, found `$`
--> $DIR/expand-expr.rs:104:19
--> $DIR/expand-expr.rs:109:19
|
LL | expand_expr_fail!($);
| ^ expected expression

error: expected expression, found `$`
--> $DIR/expand-expr.rs:33:23
--> $DIR/expand-expr.rs:38:23
|
LL | ($($t:tt)*) => { $($t)* };
| ^^^^ expected expression

error: expected expression, found `$`
--> $DIR/expand-expr.rs:106:28
--> $DIR/expand-expr.rs:111:28
|
LL | expand_expr_fail!(echo_pm!($));
| ^ expected expression

error: macro expansion ignores token `hello` and any following
--> $DIR/expand-expr.rs:110:47
--> $DIR/expand-expr.rs:115:47
|
LL | expand_expr_is!("string", echo_tts!("string"; hello));
| --------------------^^^^^-- help: you might be missing a semicolon here: `;`
Expand All @@ -33,7 +33,7 @@ LL | expand_expr_is!("string", echo_tts!("string"; hello));
= note: the usage of `echo_tts!` is likely invalid in expression context

error: macro expansion ignores token `;` and any following
--> $DIR/expand-expr.rs:111:44
--> $DIR/expand-expr.rs:116:44
|
LL | expand_expr_is!("string", echo_pm!("string"; hello));
| -----------------^-------- help: you might be missing a semicolon here: `;`
Expand All @@ -43,7 +43,7 @@ LL | expand_expr_is!("string", echo_pm!("string"; hello));
= note: the usage of `echo_pm!` is likely invalid in expression context

error: recursion limit reached while expanding `recursive_expand!`
--> $DIR/expand-expr.rs:119:16
--> $DIR/expand-expr.rs:124:16
|
LL | const _: u32 = recursive_expand!();
| ^^^^^^^^^^^^^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions src/tools/clippy/clippy_utils/src/sugg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ impl<'a> Sugg<'a> {
| ast::ExprKind::InlineAsm(..)
| ast::ExprKind::ConstBlock(..)
| ast::ExprKind::Lit(..)
| ast::ExprKind::IncludedBytes(..)
| ast::ExprKind::Loop(..)
| ast::ExprKind::MacCall(..)
| ast::ExprKind::MethodCall(..)
Expand Down
1 change: 1 addition & 0 deletions src/tools/rustfmt/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ pub(crate) fn format_expr(
}
}
ast::ExprKind::Underscore => Some("_".to_owned()),
ast::ExprKind::IncludedBytes(..) => unreachable!(),
ast::ExprKind::Err => None,
};

Expand Down
1 change: 1 addition & 0 deletions src/tools/rustfmt/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ pub(crate) fn is_block_expr(context: &RewriteContext<'_>, expr: &ast::Expr, repr
| ast::ExprKind::Continue(..)
| ast::ExprKind::Err
| ast::ExprKind::Field(..)
| ast::ExprKind::IncludedBytes(..)
| ast::ExprKind::InlineAsm(..)
| ast::ExprKind::Let(..)
| ast::ExprKind::Path(..)
Expand Down

0 comments on commit 8ef2485

Please sign in to comment.