Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement RFC 3348, c"foo" literals #108801

Merged
merged 10 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1814,6 +1814,8 @@ pub enum LitKind {
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
/// non-utf8, and symbols only allow utf8 strings.
ByteStr(Lrc<[u8]>, StrStyle),
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
CStr(Lrc<[u8]>, StrStyle),
/// A byte char (`b'f'`).
Byte(u8),
/// A character literal (`'a'`).
Expand Down Expand Up @@ -1868,6 +1870,7 @@ impl LitKind {
// unsuffixed variants
LitKind::Str(..)
| LitKind::ByteStr(..)
| LitKind::CStr(..)
| LitKind::Byte(..)
| LitKind::Char(..)
| LitKind::Int(_, LitIntType::Unsuffixed)
Expand Down
7 changes: 7 additions & 0 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ pub enum LitKind {
StrRaw(u8), // raw string delimited by `n` hash symbols
ByteStr,
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
CStr,
CStrRaw(u8),
Err,
}

Expand Down Expand Up @@ -141,6 +143,10 @@ impl fmt::Display for Lit {
delim = "#".repeat(n as usize),
string = symbol
)?,
CStr => write!(f, "c\"{symbol}\"")?,
CStrRaw(n) => {
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
}
Integer | Float | Bool | Err => write!(f, "{symbol}")?,
}

Expand Down Expand Up @@ -170,6 +176,7 @@ impl LitKind {
Float => "float",
Str | StrRaw(..) => "string",
ByteStr | ByteStrRaw(..) => "byte string",
CStr | CStrRaw(..) => "C string",
Err => "error",
}
}
Expand Down
63 changes: 62 additions & 1 deletion compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
use crate::token::{self, Token};
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_lexer::unescape::{
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
Mode,
};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
use std::ops::Range;
use std::{ascii, fmt, str};

// Escapes a string, represented as a symbol. Reuses the original symbol,
Expand Down Expand Up @@ -35,6 +39,7 @@ pub enum LitError {
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge(u32),
NulInCStr(Range<usize>),
}

impl LitKind {
Expand Down Expand Up @@ -158,6 +163,52 @@ impl LitKind {

LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
}
token::CStr => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
error?;
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Cooked)
}
token::CStrRaw(n) => {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
}
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
error?;
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Raw(n))
}
token::Err => LitKind::Err,
})
}
Expand Down Expand Up @@ -191,6 +242,14 @@ impl fmt::Display for LitKind {
string = symbol
)?;
}
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
}
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
// This can only be valid UTF-8.
let symbol = str::from_utf8(bytes).unwrap();
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
}
LitKind::Int(n, ty) => {
write!(f, "{n}")?;
match ty {
Expand Down Expand Up @@ -237,6 +296,8 @@ impl MetaItemLit {
LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
LitKind::Byte(_) => token::Byte,
LitKind::Char(_) => token::Char,
LitKind::Int(..) => token::Integer,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_ast_passes/src/feature_gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session) {
}
};
}
gate_all!(c_str_literals, "`c\"..\"` literals are experimental");
gate_all!(
if_let_guard,
"`if let` guards are experimental",
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_ast_pretty/src/pprust/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String {
token::ByteStrRaw(n) => {
format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
}
token::CStr => format!("c\"{symbol}\""),
token::CStrRaw(n) => {
format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))
}
token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
};

Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_builtin_macros/src/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ pub fn expand_concat(
Ok(ast::LitKind::Bool(b)) => {
accumulator.push_str(&b.to_string());
}
Ok(ast::LitKind::CStr(..)) => {
cx.span_err(e.span, "cannot concatenate a C string literal");
has_errors = true;
}
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
cx.emit_err(errors::ConcatBytestr { span: e.span });
has_errors = true;
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_builtin_macros/src/concat_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ fn invalid_type_err(
};
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
match ast::LitKind::from_token_lit(token_lit) {
Ok(ast::LitKind::CStr(_, _)) => {
// FIXME(c_str_literals): should concatenation of C string literals
// include the null bytes in the end?
cx.span_err(span, "cannot concatenate C string literals");
}
Ok(ast::LitKind::Char(_)) => {
let sugg =
snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
token::StrRaw(n) => LitKind::StrRaw(n),
token::ByteStr => LitKind::ByteStr,
token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
token::CStr => LitKind::CStr,
token::CStrRaw(n) => LitKind::CStrRaw(n),
token::Err => LitKind::Err,
token::Bool => unreachable!(),
}
Expand All @@ -78,6 +80,8 @@ impl ToInternal<token::LitKind> for LitKind {
LitKind::StrRaw(n) => token::StrRaw(n),
LitKind::ByteStr => token::ByteStr,
LitKind::ByteStrRaw(n) => token::ByteStrRaw(n),
LitKind::CStr => token::CStr,
LitKind::CStrRaw(n) => token::CStrRaw(n),
LitKind::Err => token::Err,
}
}
Expand Down Expand Up @@ -436,6 +440,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
| token::LitKind::StrRaw(_)
| token::LitKind::ByteStr
| token::LitKind::ByteStrRaw(_)
| token::LitKind::CStr
| token::LitKind::CStrRaw(_)
| token::LitKind::Err => return Err(()),
token::LitKind::Integer | token::LitKind::Float => {}
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_feature/src/active.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ declare_features! (
(active, async_closure, "1.37.0", Some(62290), None),
/// Allows async functions to be declared, implemented, and used in traits.
(active, async_fn_in_trait, "1.66.0", Some(91611), None),
/// Allows `c"foo"` literals.
(active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None),
/// Treat `extern "C"` function as nounwind.
(active, c_unwind, "1.52.0", Some(74990), None),
/// Allows using C-variadics.
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_hir/src/lang_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ language_item_table! {
RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None;

String, sym::String, string, Target::Struct, GenericRequirement::None;
CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None;
}

pub enum GenericRequirement {
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
opt_ty.unwrap_or_else(|| self.next_float_var())
}
ast::LitKind::Bool(_) => tcx.types.bool,
ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
tcx.lifetimes.re_static,
tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
.skip_binder(),
),
ast::LitKind::Err => tcx.ty_error_misc(),
}
}
Expand Down
90 changes: 57 additions & 33 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,16 @@ pub enum LiteralKind {
Str { terminated: bool },
/// "b"abc"", "b"abc"
ByteStr { terminated: bool },
/// `c"abc"`, `c"abc`
CStr { terminated: bool },
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
/// an invalid literal.
RawStr { n_hashes: Option<u8> },
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
/// indicates an invalid literal.
RawByteStr { n_hashes: Option<u8> },
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
RawCStr { n_hashes: Option<u8> },
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
Expand Down Expand Up @@ -357,39 +361,18 @@ impl Cursor<'_> {
},

// Byte literal, byte string literal, raw byte string literal or identifier.
'b' => match (self.first(), self.second()) {
('\'', _) => {
self.bump();
let terminated = self.single_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = Byte { terminated };
Literal { kind, suffix_start }
}
('"', _) => {
self.bump();
let terminated = self.double_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = ByteStr { terminated };
Literal { kind, suffix_start }
}
('r', '"') | ('r', '#') => {
self.bump();
let res = self.raw_double_quoted_string(2);
let suffix_start = self.pos_within_token();
if res.is_ok() {
self.eat_literal_suffix();
}
let kind = RawByteStr { n_hashes: res.ok() };
Literal { kind, suffix_start }
}
_ => self.ident_or_unknown_prefix(),
},
'b' => self.c_or_byte_string(
|terminated| ByteStr { terminated },
|n_hashes| RawByteStr { n_hashes },
Some(|terminated| Byte { terminated }),
),

// c-string literal, raw c-string literal or identifier.
'c' => self.c_or_byte_string(
|terminated| CStr { terminated },
|n_hashes| RawCStr { n_hashes },
None,
),

// Identifier (this should be checked after other variant that can
// start as identifier).
Expand Down Expand Up @@ -553,6 +536,47 @@ impl Cursor<'_> {
}
}

fn c_or_byte_string(
&mut self,
mk_kind: impl FnOnce(bool) -> LiteralKind,
mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind,
single_quoted: Option<fn(bool) -> LiteralKind>,
) -> TokenKind {
match (self.first(), self.second(), single_quoted) {
('\'', _, Some(mk_kind)) => {
self.bump();
let terminated = self.single_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = mk_kind(terminated);
Literal { kind, suffix_start }
}
('"', _, _) => {
self.bump();
let terminated = self.double_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = mk_kind(terminated);
Literal { kind, suffix_start }
}
('r', '"', _) | ('r', '#', _) => {
self.bump();
let res = self.raw_double_quoted_string(2);
let suffix_start = self.pos_within_token();
if res.is_ok() {
self.eat_literal_suffix();
}
let kind = mk_kind_raw(res.ok());
Literal { kind, suffix_start }
}
_ => self.ident_or_unknown_prefix(),
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
Expand Down
Loading