From 8ff3903643b530c9029e8f2c6c6956fda8f21d77 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Sun, 5 Mar 2023 15:03:22 +0000
Subject: [PATCH 01/10] initial step towards implementing C string literals

---
 compiler/rustc_ast/src/ast.rs                 |   3 +
 compiler/rustc_ast/src/token.rs               |   7 +
 compiler/rustc_ast/src/util/literal.rs        |  55 ++++-
 compiler/rustc_ast_pretty/src/pprust/state.rs |   2 +
 compiler/rustc_builtin_macros/src/concat.rs   |   4 +
 .../rustc_builtin_macros/src/concat_bytes.rs  |   4 +
 .../rustc_expand/src/proc_macro_server.rs     |   4 +
 compiler/rustc_hir/src/lang_items.rs          |   1 +
 .../rustc_hir_typeck/src/fn_ctxt/checks.rs    |   5 +
 compiler/rustc_lexer/src/lib.rs               |  30 +++
 compiler/rustc_lexer/src/unescape.rs          | 199 +++++++++++-------
 compiler/rustc_parse/src/lexer/mod.rs         |  64 ++++++
 library/core/src/ffi/c_str.rs                 |   1 +
 src/librustdoc/html/highlight.rs              |   4 +-
 .../src/matches/match_same_arms.rs            |   1 +
 .../clippy/clippy_lints/src/utils/author.rs   |   5 +
 src/tools/clippy/clippy_utils/src/consts.rs   |   1 +
 17 files changed, 310 insertions(+), 80 deletions(-)

diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs
index ea04ba4f66e46..fb22e4640647d 100644
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@@ -1814,6 +1814,8 @@ pub enum LitKind {
     /// A byte string (`b"foo"`). Not stored as a symbol because it might be
     /// non-utf8, and symbols only allow utf8 strings.
     ByteStr(Lrc<[u8]>, StrStyle),
+    /// A C String (`c"foo"`).
+    CStr(Lrc<[u8]>, StrStyle),
     /// A byte char (`b'f'`).
     Byte(u8),
     /// A character literal (`'a'`).
@@ -1868,6 +1870,7 @@ impl LitKind {
             // unsuffixed variants
             LitKind::Str(..)
             | LitKind::ByteStr(..)
+            | LitKind::CStr(..)
             | LitKind::Byte(..)
             | LitKind::Char(..)
             | LitKind::Int(_, LitIntType::Unsuffixed)
diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs
index f947ae4d05732..42b843482a32b 100644
--- a/compiler/rustc_ast/src/token.rs
+++ b/compiler/rustc_ast/src/token.rs
@@ -74,6 +74,8 @@ pub enum LitKind {
     StrRaw(u8), // raw string delimited by `n` hash symbols
     ByteStr,
     ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
+    CStr,
+    CStrRaw(u8),
     Err,
 }
 
@@ -141,6 +143,10 @@ impl fmt::Display for Lit {
                 delim = "#".repeat(n as usize),
                 string = symbol
             )?,
+            CStr => write!(f, "c\"{symbol}\"")?,
+            CStrRaw(n) => {
+                write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
+            }
             Integer | Float | Bool | Err => write!(f, "{symbol}")?,
         }
 
@@ -170,6 +176,7 @@ impl LitKind {
             Float => "float",
             Str | StrRaw(..) => "string",
             ByteStr | ByteStrRaw(..) => "byte string",
+            CStr | CStrRaw(..) => "C string",
             Err => "error",
         }
     }
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 74b842ac96eac..8534011e18921 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -2,7 +2,10 @@
 
 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
 use crate::token::{self, Token};
-use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
+use rustc_lexer::unescape::{
+    byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
+    Mode,
+};
 use rustc_span::symbol::{kw, sym, Symbol};
 use rustc_span::Span;
 use std::{ascii, fmt, str};
@@ -158,6 +161,52 @@ impl LitKind {
 
                 LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
             }
+            token::CStr => {
+                let s = symbol.as_str();
+                let mut buf = Vec::with_capacity(s.len());
+                let mut error = Ok(());
+                unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
+                    Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
+                        error = Err(LitError::NulInCStr(span));
+                    }
+                    Ok(CStrUnit::Byte(b)) => buf.push(b),
+                    Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
+                    Ok(CStrUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Err(LitError::LexerError);
+                        }
+                    }
+                });
+                error?;
+                buf.push(b'\0');
+                LitKind::CStr(buf.into(), StrStyle::Cooked)
+            }
+            token::CStrRaw(n) => {
+                let s = symbol.as_str();
+                let mut buf = Vec::with_capacity(s.len());
+                let mut error = Ok(());
+                unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
+                    Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
+                        error = Err(LitError::NulInCStr(span));
+                    }
+                    Ok(CStrUnit::Byte(b)) => buf.push(b),
+                    Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
+                    Ok(CStrUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Err(err) => {
+                        if err.is_fatal() {
+                            error = Err(LitError::LexerError);
+                        }
+                    }
+                });
+                error?;
+                buf.push(b'\0');
+                LitKind::CStr(buf.into(), StrStyle::Raw(n))
+            }
             token::Err => LitKind::Err,
         })
     }
@@ -191,6 +240,8 @@ impl fmt::Display for LitKind {
                     string = symbol
                 )?;
             }
+            // TODO need to reescape
+            LitKind::CStr(..) => todo!(),
             LitKind::Int(n, ty) => {
                 write!(f, "{n}")?;
                 match ty {
@@ -237,6 +288,8 @@ impl MetaItemLit {
             LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
             LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
             LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
+            LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
+            LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
             LitKind::Byte(_) => token::Byte,
             LitKind::Char(_) => token::Char,
             LitKind::Int(..) => token::Integer,
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
index 849336c8669a1..535ac89e751d5 100644
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -210,6 +210,8 @@ pub fn literal_to_string(lit: token::Lit) -> String {
         token::ByteStrRaw(n) => {
             format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
         }
+        // TODO
+        token::CStr | token::CStrRaw(_) => todo!(),
         token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
     };
 
diff --git a/compiler/rustc_builtin_macros/src/concat.rs b/compiler/rustc_builtin_macros/src/concat.rs
index b92964d03e9f9..50e88ae2eeede 100644
--- a/compiler/rustc_builtin_macros/src/concat.rs
+++ b/compiler/rustc_builtin_macros/src/concat.rs
@@ -32,6 +32,10 @@ pub fn expand_concat(
                 Ok(ast::LitKind::Bool(b)) => {
                     accumulator.push_str(&b.to_string());
                 }
+                Ok(ast::LitKind::CStr(..)) => {
+                    cx.span_err(e.span, "cannot concatenate a C string literal");
+                    has_errors = true;
+                }
                 Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
                     cx.emit_err(errors::ConcatBytestr { span: e.span });
                     has_errors = true;
diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs
index ba639c0a9fe3c..ae674995e4294 100644
--- a/compiler/rustc_builtin_macros/src/concat_bytes.rs
+++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs
@@ -18,6 +18,10 @@ fn invalid_type_err(
     };
     let snippet = cx.sess.source_map().span_to_snippet(span).ok();
     match ast::LitKind::from_token_lit(token_lit) {
+        Ok(ast::LitKind::CStr(_, _)) => {
+            // TODO
+            cx.span_err(span, "cannot concatenate C string litearls");
+        }
         Ok(ast::LitKind::Char(_)) => {
             let sugg =
                 snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });
diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs
index 1e7d07bc22d52..04bdea273ebd5 100644
--- a/compiler/rustc_expand/src/proc_macro_server.rs
+++ b/compiler/rustc_expand/src/proc_macro_server.rs
@@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
             token::StrRaw(n) => LitKind::StrRaw(n),
             token::ByteStr => LitKind::ByteStr,
             token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
+            // TODO
+            token::CStr | token::CStrRaw(_) => todo!(),
             token::Err => LitKind::Err,
             token::Bool => unreachable!(),
         }
@@ -436,6 +438,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
                 | token::LitKind::StrRaw(_)
                 | token::LitKind::ByteStr
                 | token::LitKind::ByteStrRaw(_)
+                | token::LitKind::CStr
+                | token::LitKind::CStrRaw(_)
                 | token::LitKind::Err => return Err(()),
                 token::LitKind::Integer | token::LitKind::Float => {}
             }
diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs
index e1c030d3e198a..7ddafc9083a15 100644
--- a/compiler/rustc_hir/src/lang_items.rs
+++ b/compiler/rustc_hir/src/lang_items.rs
@@ -332,6 +332,7 @@ language_item_table! {
     RangeTo,                 sym::RangeTo,             range_to_struct,            Target::Struct,         GenericRequirement::None;
 
     String,                  sym::String,              string,                     Target::Struct,         GenericRequirement::None;
+    CStr,                    sym::CStr,                c_str,                      Target::Struct,         GenericRequirement::None;
 }
 
 pub enum GenericRequirement {
diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
index f42c825d9e8b1..374266638d160 100644
--- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
+++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
@@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
                 opt_ty.unwrap_or_else(|| self.next_float_var())
             }
             ast::LitKind::Bool(_) => tcx.types.bool,
+            ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
+                tcx.lifetimes.re_static,
+                tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
+                    .skip_binder(),
+            ),
             ast::LitKind::Err => tcx.ty_error_misc(),
         }
     }
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index b3f4b5cd5e5a0..95cb1f93e3906 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -186,12 +186,16 @@ pub enum LiteralKind {
     Str { terminated: bool },
     /// "b"abc"", "b"abc"
     ByteStr { terminated: bool },
+    /// `c"abc"`, `c"abc`
+    CStr { terminated: bool },
     /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
     /// an invalid literal.
     RawStr { n_hashes: Option<u8> },
     /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
     /// indicates an invalid literal.
     RawByteStr { n_hashes: Option<u8> },
+    /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` is invalid.
+    RawCStr { n_hashes: Option<u8> },
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -391,6 +395,32 @@ impl Cursor<'_> {
                 _ => self.ident_or_unknown_prefix(),
             },
 
+            // TODO deduplicate this code
+            // c-string literal, raw c-string literal or identifier.
+            'c' => match (self.first(), self.second()) {
+                ('"', _) => {
+                    self.bump();
+                    let terminated = self.double_quoted_string();
+                    let suffix_start = self.pos_within_token();
+                    if terminated {
+                        self.eat_literal_suffix();
+                    }
+                    let kind = CStr { terminated };
+                    Literal { kind, suffix_start }
+                }
+                ('r', '"') | ('r', '#') => {
+                    self.bump();
+                    let res = self.raw_double_quoted_string(2);
+                    let suffix_start = self.pos_within_token();
+                    if res.is_ok() {
+                        self.eat_literal_suffix();
+                    }
+                    let kind = RawCStr { n_hashes: res.ok() };
+                    Literal { kind, suffix_start }
+                }
+                _ => self.ident_or_unknown_prefix(),
+            },
+
             // Identifier (this should be checked after other variant that can
             // start as identifier).
             c if is_id_start(c) => self.ident_or_unknown_prefix(),
diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index bb4d91247b81d..4b707c9ec9619 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -90,6 +90,39 @@ where
         Mode::RawStr | Mode::RawByteStr => {
             unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
         }
+        Mode::CStr | Mode::RawCStr => unreachable!(),
+    }
+}
+
+pub enum CStrUnit {
+    Byte(u8),
+    Char(char),
+}
+
+impl From<u8> for CStrUnit {
+    fn from(value: u8) -> Self {
+        CStrUnit::Byte(value)
+    }
+}
+
+impl From<char> for CStrUnit {
+    fn from(value: char) -> Self {
+        CStrUnit::Char(value)
+    }
+}
+
+pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
+where
+    F: FnMut(Range<usize>, Result<CStrUnit, EscapeError>),
+{
+    if mode == Mode::RawCStr {
+        unescape_raw_str_or_raw_byte_str(
+            src,
+            mode.characters_should_be_ascii(),
+            &mut |r, result| callback(r, result.map(CStrUnit::Char)),
+        );
+    } else {
+        unescape_str_common(src, mode, callback);
     }
 }
 
@@ -114,19 +147,26 @@ pub enum Mode {
     ByteStr,
     RawStr,
     RawByteStr,
+    CStr,
+    RawCStr,
 }
 
 impl Mode {
     pub fn in_double_quotes(self) -> bool {
         match self {
-            Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true,
+            Mode::Str
+            | Mode::ByteStr
+            | Mode::RawStr
+            | Mode::RawByteStr
+            | Mode::CStr
+            | Mode::RawCStr => true,
             Mode::Char | Mode::Byte => false,
         }
     }
 
     pub fn is_byte(self) -> bool {
         match self {
-            Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
+            Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true,
             Mode::Char | Mode::Str | Mode::RawStr => false,
         }
     }
@@ -163,62 +203,63 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
             value as char
         }
 
-        'u' => {
-            // We've parsed '\u', now we have to parse '{..}'.
+        'u' => scan_unicode(chars, is_byte)?,
+        _ => return Err(EscapeError::InvalidEscape),
+    };
+    Ok(res)
+}
 
-            if chars.next() != Some('{') {
-                return Err(EscapeError::NoBraceInUnicodeEscape);
-            }
+fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
+    // We've parsed '\u', now we have to parse '{..}'.
 
-            // First character must be a hexadecimal digit.
-            let mut n_digits = 1;
-            let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
-                '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
-                '}' => return Err(EscapeError::EmptyUnicodeEscape),
-                c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
-            };
-
-            // First character is valid, now parse the rest of the number
-            // and closing brace.
-            loop {
-                match chars.next() {
-                    None => return Err(EscapeError::UnclosedUnicodeEscape),
-                    Some('_') => continue,
-                    Some('}') => {
-                        if n_digits > 6 {
-                            return Err(EscapeError::OverlongUnicodeEscape);
-                        }
-
-                        // Incorrect syntax has higher priority for error reporting
-                        // than unallowed value for a literal.
-                        if is_byte {
-                            return Err(EscapeError::UnicodeEscapeInByte);
-                        }
-
-                        break std::char::from_u32(value).ok_or_else(|| {
-                            if value > 0x10FFFF {
-                                EscapeError::OutOfRangeUnicodeEscape
-                            } else {
-                                EscapeError::LoneSurrogateUnicodeEscape
-                            }
-                        })?;
-                    }
-                    Some(c) => {
-                        let digit: u32 =
-                            c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
-                        n_digits += 1;
-                        if n_digits > 6 {
-                            // Stop updating value since we're sure that it's incorrect already.
-                            continue;
-                        }
-                        value = value * 16 + digit;
+    if chars.next() != Some('{') {
+        return Err(EscapeError::NoBraceInUnicodeEscape);
+    }
+
+    // First character must be a hexadecimal digit.
+    let mut n_digits = 1;
+    let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
+        '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
+        '}' => return Err(EscapeError::EmptyUnicodeEscape),
+        c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
+    };
+
+    // First character is valid, now parse the rest of the number
+    // and closing brace.
+    loop {
+        match chars.next() {
+            None => return Err(EscapeError::UnclosedUnicodeEscape),
+            Some('_') => continue,
+            Some('}') => {
+                if n_digits > 6 {
+                    return Err(EscapeError::OverlongUnicodeEscape);
+                }
+
+                // Incorrect syntax has higher priority for error reporting
+                // than unallowed value for a literal.
+                if is_byte {
+                    return Err(EscapeError::UnicodeEscapeInByte);
+                }
+
+                break std::char::from_u32(value).ok_or_else(|| {
+                    if value > 0x10FFFF {
+                        EscapeError::OutOfRangeUnicodeEscape
+                    } else {
+                        EscapeError::LoneSurrogateUnicodeEscape
                     }
-                };
+                });
             }
-        }
-        _ => return Err(EscapeError::InvalidEscape),
-    };
-    Ok(res)
+            Some(c) => {
+                let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
+                n_digits += 1;
+                if n_digits > 6 {
+                    // Stop updating value since we're sure that it's incorrect already.
+                    continue;
+                }
+                value = value * 16 + digit;
+            }
+        };
+    }
 }
 
 #[inline]
@@ -266,7 +307,9 @@ where
                         // if unescaped '\' character is followed by '\n'.
                         // For details see [Rust language reference]
                         // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
-                        skip_ascii_whitespace(&mut chars, start, callback);
+                        skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
+                            callback(range, Err(err))
+                        });
                         continue;
                     }
                     _ => scan_escape(&mut chars, is_byte),
@@ -281,32 +324,32 @@ where
         let end = src.len() - chars.as_str().len();
         callback(start..end, res);
     }
+}
 
-    fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
-    where
-        F: FnMut(Range<usize>, Result<char, EscapeError>),
-    {
-        let tail = chars.as_str();
-        let first_non_space = tail
-            .bytes()
-            .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
-            .unwrap_or(tail.len());
-        if tail[1..first_non_space].contains('\n') {
-            // The +1 accounts for the escaping slash.
-            let end = start + first_non_space + 1;
-            callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning));
-        }
-        let tail = &tail[first_non_space..];
-        if let Some(c) = tail.chars().nth(0) {
-            if c.is_whitespace() {
-                // For error reporting, we would like the span to contain the character that was not
-                // skipped. The +1 is necessary to account for the leading \ that started the escape.
-                let end = start + first_non_space + c.len_utf8() + 1;
-                callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
-            }
+fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
+where
+    F: FnMut(Range<usize>, EscapeError),
+{
+    let tail = chars.as_str();
+    let first_non_space = tail
+        .bytes()
+        .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
+        .unwrap_or(tail.len());
+    if tail[1..first_non_space].contains('\n') {
+        // The +1 accounts for the escaping slash.
+        let end = start + first_non_space + 1;
+        callback(start..end, EscapeError::MultipleSkippedLinesWarning);
+    }
+    let tail = &tail[first_non_space..];
+    if let Some(c) = tail.chars().nth(0) {
+        if c.is_whitespace() {
+            // For error reporting, we would like the span to contain the character that was not
+            // skipped. The +1 is necessary to account for the leading \ that started the escape.
+            let end = start + first_non_space + c.len_utf8() + 1;
+            callback(start..end, EscapeError::UnskippedWhitespaceWarning);
         }
-        *chars = tail.chars();
     }
+    *chars = tail.chars();
 }
 
 /// Takes a contents of a string literal (without quotes) and produces a
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index a4a75fcb96995..b2050780309f8 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -415,6 +415,16 @@ impl<'a> StringReader<'a> {
                 }
                 self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
             }
+            rustc_lexer::LiteralKind::CStr { terminated } => {
+                if !terminated {
+                    self.sess.span_diagnostic.span_fatal_with_code(
+                        self.mk_sp(start + BytePos(1), end),
+                        "unterminated C string",
+                        error_code!(E0767),
+                    )
+                }
+                self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
+            }
             rustc_lexer::LiteralKind::RawStr { n_hashes } => {
                 if let Some(n_hashes) = n_hashes {
                     let n = u32::from(n_hashes);
@@ -433,6 +443,15 @@ impl<'a> StringReader<'a> {
                     self.report_raw_str_error(start, 2);
                 }
             }
+            rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
+                if let Some(n_hashes) = n_hashes {
+                    let n = u32::from(n_hashes);
+                    let kind = token::CStrRaw(n_hashes);
+                    self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
+                } else {
+                    self.report_raw_str_error(start, 2);
+                }
+            }
             rustc_lexer::LiteralKind::Int { base, empty_int } => {
                 if empty_int {
                     let span = self.mk_sp(start, end);
@@ -692,6 +711,51 @@ impl<'a> StringReader<'a> {
             (token::Err, self.symbol_from_to(start, end))
         }
     }
+
+    fn cook_c_string(
+        &self,
+        kind: token::LitKind,
+        mode: Mode,
+        start: BytePos,
+        end: BytePos,
+        prefix_len: u32,
+        postfix_len: u32,
+    ) -> (token::LitKind, Symbol) {
+        let mut has_fatal_err = false;
+        let content_start = start + BytePos(prefix_len);
+        let content_end = end - BytePos(postfix_len);
+        let lit_content = self.str_from_to(content_start, content_end);
+        unescape::unescape_c_string(lit_content, mode, &mut |range, result| {
+            // Here we only check for errors. The actual unescaping is done later.
+            if let Err(err) = result {
+                let span_with_quotes = self.mk_sp(start, end);
+                let (start, end) = (range.start as u32, range.end as u32);
+                let lo = content_start + BytePos(start);
+                let hi = lo + BytePos(end - start);
+                let span = self.mk_sp(lo, hi);
+                if err.is_fatal() {
+                    has_fatal_err = true;
+                }
+                emit_unescape_error(
+                    &self.sess.span_diagnostic,
+                    lit_content,
+                    span_with_quotes,
+                    span,
+                    mode,
+                    range,
+                    err,
+                );
+            }
+        });
+
+        // We normally exclude the quotes for the symbol, but for errors we
+        // include it because it results in clearer error messages.
+        if !has_fatal_err {
+            (kind, Symbol::intern(lit_content))
+        } else {
+            (token::Err, self.symbol_from_to(start, end))
+        }
+    }
 }
 
 pub fn nfc_normalize(string: &str) -> Symbol {
diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs
index bd2b2c36c4315..2ac679b6bc347 100644
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@@ -82,6 +82,7 @@ use crate::str;
 #[cfg_attr(not(test), rustc_diagnostic_item = "CStr")]
 #[stable(feature = "core_c_str", since = "1.64.0")]
 #[rustc_has_incoherent_inherent_impls]
+#[cfg_attr(not(bootstrap), lang = "CStr")]
 // FIXME:
 // `fn from` in `impl From<&CStr> for Box<CStr>` current implementation relies
 // on `CStr` being layout-compatible with `[u8]`.
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index 946c85a205f5a..c94968b4817cb 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -811,7 +811,9 @@ impl<'src> Classifier<'src> {
                 | LiteralKind::Str { .. }
                 | LiteralKind::ByteStr { .. }
                 | LiteralKind::RawStr { .. }
-                | LiteralKind::RawByteStr { .. } => Class::String,
+                | LiteralKind::RawByteStr { .. }
+                | LiteralKind::CStr { .. }
+                | LiteralKind::RawCStr { .. } => Class::String,
                 // Number literals.
                 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
             },
diff --git a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs
index 158e6caa4de54..a48f4c77f857f 100644
--- a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs
+++ b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs
@@ -284,6 +284,7 @@ impl<'a> NormalizedPat<'a> {
                     LitKind::Str(sym, _) => Self::LitStr(sym),
                     LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes),
                     LitKind::Byte(val) => Self::LitInt(val.into()),
+                    LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes),
                     LitKind::Char(val) => Self::LitInt(val.into()),
                     LitKind::Int(val, _) => Self::LitInt(val),
                     LitKind::Bool(val) => Self::LitBool(val),
diff --git a/src/tools/clippy/clippy_lints/src/utils/author.rs b/src/tools/clippy/clippy_lints/src/utils/author.rs
index 01927b6b5f10d..f75dff46624e4 100644
--- a/src/tools/clippy/clippy_lints/src/utils/author.rs
+++ b/src/tools/clippy/clippy_lints/src/utils/author.rs
@@ -304,6 +304,11 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> {
                 kind!("ByteStr(ref {vec})");
                 chain!(self, "let [{:?}] = **{vec}", vec.value);
             },
+            LitKind::CStr(ref vec, _) => {
+                bind!(self, vec);
+                kind!("CStr(ref {vec})");
+                chain!(self, "let [{:?}] = **{vec}", vec.value);
+            }
             LitKind::Str(s, _) => {
                 bind!(self, s);
                 kind!("Str({s}, _)");
diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs
index 99bfc4b5717c8..7c7ec6d334d9b 100644
--- a/src/tools/clippy/clippy_utils/src/consts.rs
+++ b/src/tools/clippy/clippy_utils/src/consts.rs
@@ -211,6 +211,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option<Ty<'_>>) -> Constant {
         LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
         LitKind::Byte(b) => Constant::Int(u128::from(b)),
         LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
+        LitKind::CStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
         LitKind::Char(c) => Constant::Char(c),
         LitKind::Int(n, _) => Constant::Int(n),
         LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {

From 76d1f93896fb642cd27cbe8ef481b66e974dbdf9 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Mon, 6 Mar 2023 07:10:23 +0000
Subject: [PATCH 02/10] update and add a few tests

---
 compiler/rustc_ast/src/ast.rs                 |   2 +-
 compiler/rustc_ast/src/util/literal.rs        |   4 ++--
 compiler/rustc_ast_passes/src/feature_gate.rs |   1 +
 compiler/rustc_feature/src/active.rs          |   2 ++
 .../src/build/expr/as_constant.rs             |   6 +++++
 compiler/rustc_span/src/symbol.rs             |   1 +
 .../rfcs/rfc-3348-c-string-literals/basic.rs  |   7 ++++++
 .../rfcs/rfc-3348-c-string-literals/gate.rs   |   7 ++++++
 .../rfc-3348-c-string-literals/gate.stderr    |  21 ++++++++++++++++++
 .../rfc-3348-c-string-literals/no-nuls.rs     | Bin 0 -> 322 bytes
 .../rfc-3348-c-string-literals/no-nuls.stderr | Bin 0 -> 670 bytes
 11 files changed, 48 insertions(+), 3 deletions(-)
 create mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
 create mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
 create mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
 create mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
 create mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr

diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs
index fb22e4640647d..fb90d309fcde7 100644
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@@ -1814,7 +1814,7 @@ pub enum LitKind {
     /// A byte string (`b"foo"`). Not stored as a symbol because it might be
     /// non-utf8, and symbols only allow utf8 strings.
     ByteStr(Lrc<[u8]>, StrStyle),
-    /// A C String (`c"foo"`).
+    /// A C String (`c"foo"`). Guaranteed only have `\0` in the end.
     CStr(Lrc<[u8]>, StrStyle),
     /// A byte char (`b'f'`).
     Byte(u8),
diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 8534011e18921..5fa0ea354550a 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -181,7 +181,7 @@ impl LitKind {
                     }
                 });
                 error?;
-                buf.push(b'\0');
+                buf.push(0);
                 LitKind::CStr(buf.into(), StrStyle::Cooked)
             }
             token::CStrRaw(n) => {
@@ -204,7 +204,7 @@ impl LitKind {
                     }
                 });
                 error?;
-                buf.push(b'\0');
+                buf.push(0);
                 LitKind::CStr(buf.into(), StrStyle::Raw(n))
             }
             token::Err => LitKind::Err,
diff --git a/compiler/rustc_ast_passes/src/feature_gate.rs b/compiler/rustc_ast_passes/src/feature_gate.rs
index 17bcd24ee39fd..9defc6603e837 100644
--- a/compiler/rustc_ast_passes/src/feature_gate.rs
+++ b/compiler/rustc_ast_passes/src/feature_gate.rs
@@ -1,3 +1,4 @@
+use ast::token;
 use rustc_ast as ast;
 use rustc_ast::visit::{self, AssocCtxt, FnCtxt, FnKind, Visitor};
 use rustc_ast::{attr, AssocConstraint, AssocConstraintKind, NodeId};
diff --git a/compiler/rustc_feature/src/active.rs b/compiler/rustc_feature/src/active.rs
index 6201e5b619b87..45f462a63ee8c 100644
--- a/compiler/rustc_feature/src/active.rs
+++ b/compiler/rustc_feature/src/active.rs
@@ -313,6 +313,8 @@ declare_features! (
     (active, async_fn_in_trait, "1.66.0", Some(91611), None),
     /// Treat `extern "C"` function as nounwind.
     (active, c_unwind, "1.52.0", Some(74990), None),
+    /// Allows `c"foo"` literals.
+    (active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None),
     /// Allows using C-variadics.
     (active, c_variadic, "1.34.0", Some(44930), None),
     /// Allows the use of `#[cfg(sanitize = "option")]`; set when -Zsanitizer is used.
diff --git a/compiler/rustc_mir_build/src/build/expr/as_constant.rs b/compiler/rustc_mir_build/src/build/expr/as_constant.rs
index fbcfd43372433..59549435233c5 100644
--- a/compiler/rustc_mir_build/src/build/expr/as_constant.rs
+++ b/compiler/rustc_mir_build/src/build/expr/as_constant.rs
@@ -146,6 +146,12 @@ pub(crate) fn lit_to_mir_constant<'tcx>(
             let id = tcx.allocate_bytes(data);
             ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx))
         }
+        (ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if Some(def.did()) == tcx.lang_items().c_str()) =>
+        {
+            let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]);
+            let allocation = tcx.mk_const_alloc(allocation);
+            ConstValue::Slice { data: allocation, start: 0, end: data.len() }
+        }
         (ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => {
             ConstValue::Scalar(Scalar::from_uint(*n, Size::from_bytes(1)))
         }
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 7969b848fd956..f83aa504b90e6 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -441,6 +441,7 @@ symbols! {
         bridge,
         bswap,
         c_str,
+        c_str_literals,
         c_unwind,
         c_variadic,
         call,
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
new file mode 100644
index 0000000000000..e4b07ab8108e0
--- /dev/null
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
@@ -0,0 +1,7 @@
+// run-pass
+
+#![feature(c_str_literals)]
+
+fn main() {
+    assert_eq!(b"test\0", c"test".to_bytes_with_nul());
+}
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
new file mode 100644
index 0000000000000..674b0c5e23351
--- /dev/null
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
@@ -0,0 +1,7 @@
+fn main() {
+    c"foo";
+    //~^ ERROR: `c".."` literals are experimental
+
+    m!(c"test");
+    //~^ ERROR: `c".."` literals are experimental
+}
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
new file mode 100644
index 0000000000000..bc0c537aada83
--- /dev/null
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
@@ -0,0 +1,21 @@
+error[E0658]: `c".."` literals are experimental
+  --> $DIR/gate.rs:8:5
+   |
+LL |     c"foo";
+   |     ^^^^^^
+   |
+   = note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
+   = help: add `#![feature(c_str_literals)]` to the crate attributes to enable
+
+error[E0658]: `c".."` literals are experimental
+  --> $DIR/gate.rs:11:8
+   |
+LL |     m!(c"test");
+   |        ^^^^^^^
+   |
+   = note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
+   = help: add `#![feature(c_str_literals)]` to the crate attributes to enable
+
+error: aborting due to 2 previous errors
+
+For more information about this error, try `rustc --explain E0658`.
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
new file mode 100644
index 0000000000000000000000000000000000000000..f6c86a1ba8762358537827376cbffc5c3f4508e9
GIT binary patch
literal 322
zcmY#Zj802UEGaEY)kuynE-8x7$t+1NO3W$NjOF4=%Tvfr%*@l!RH)`s0D@$t7y~71
zFjHT@E>6KUD9As^N+GW_Cr2SUBe5tk8K_qwGf%-;0cccaUOE>{TWPg{K`mKY8OYKV
QQ(-`sON*39w6B&60N$!x!T<mO

literal 0
HcmV?d00001

diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
new file mode 100644
index 0000000000000000000000000000000000000000..8a1669b285adaaf14161a49780a7a61dcf4673c9
GIT binary patch
literal 670
zcmcJNI}5@v5XYVKDgG4KYQ#q+f{QLfC+AYAHE5wEa!Ew6es@F0SVV;0bT96AFG-|H
zK)JR>eXXdftt2!GI~Yrix-cM3w;1ma$Enh(Pc#!UVj(ggLC5kOomYT5DR{C8&R_#&
z`K>OKf1<NKw0m;?it(kFL0Xvco*qUKei-t+hg>}P$Lfcgd?nWL;VFqm$4^_YPMWO$
VFr1x))mY+2^4@4?5MIMD_6Dmm#F_vA

literal 0
HcmV?d00001


From a49570fd20a16c0a41b1dfdaf121ef69f60acd7e Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Mon, 6 Mar 2023 07:42:04 +0000
Subject: [PATCH 03/10] fix TODO comments

---
 compiler/rustc_ast/src/util/literal.rs        |  10 +-
 compiler/rustc_ast_pretty/src/pprust/state.rs |   6 +-
 .../rustc_builtin_macros/src/concat_bytes.rs  |   5 +-
 .../rustc_expand/src/proc_macro_server.rs     |   6 +-
 compiler/rustc_feature/src/active.rs          |   4 +-
 compiler/rustc_lexer/src/lib.rs               | 108 +++++++++---------
 library/proc_macro/src/bridge/mod.rs          |   4 +
 .../rfcs/rfc-3348-c-string-literals/gate.rs   |   6 +
 8 files changed, 82 insertions(+), 67 deletions(-)

diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index 5fa0ea354550a..cd3b163e3ac97 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -240,8 +240,14 @@ impl fmt::Display for LitKind {
                     string = symbol
                 )?;
             }
-            // TODO need to reescape
-            LitKind::CStr(..) => todo!(),
+            LitKind::CStr(ref bytes, StrStyle::Cooked) => {
+                write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
+            }
+            LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
+                // This can only be valid UTF-8.
+                let symbol = str::from_utf8(bytes).unwrap();
+                write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
+            }
             LitKind::Int(n, ty) => {
                 write!(f, "{n}")?;
                 match ty {
diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
index 535ac89e751d5..61b7863c686cf 100644
--- a/compiler/rustc_ast_pretty/src/pprust/state.rs
+++ b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -210,8 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String {
         token::ByteStrRaw(n) => {
             format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
         }
-        // TODO
-        token::CStr | token::CStrRaw(_) => todo!(),
+        token::CStr => format!("c\"{symbol}\""),
+        token::CStrRaw(n) => {
+            format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))
+        }
         token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
     };
 
diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs
index ae674995e4294..5ef35af0a059a 100644
--- a/compiler/rustc_builtin_macros/src/concat_bytes.rs
+++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs
@@ -19,8 +19,9 @@ fn invalid_type_err(
     let snippet = cx.sess.source_map().span_to_snippet(span).ok();
     match ast::LitKind::from_token_lit(token_lit) {
         Ok(ast::LitKind::CStr(_, _)) => {
-            // TODO
-            cx.span_err(span, "cannot concatenate C string litearls");
+            // FIXME(c_str_literals): should concatenation of C string literals
+            // include the null bytes in the end?
+            cx.span_err(span, "cannot concatenate C string literals");
         }
         Ok(ast::LitKind::Char(_)) => {
             let sugg =
diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs
index 04bdea273ebd5..891e84a2f3071 100644
--- a/compiler/rustc_expand/src/proc_macro_server.rs
+++ b/compiler/rustc_expand/src/proc_macro_server.rs
@@ -61,8 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
             token::StrRaw(n) => LitKind::StrRaw(n),
             token::ByteStr => LitKind::ByteStr,
             token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
-            // TODO
-            token::CStr | token::CStrRaw(_) => todo!(),
+            token::CStr => LitKind::CStr,
+            token::CStrRaw(n) => LitKind::CStrRaw(n),
             token::Err => LitKind::Err,
             token::Bool => unreachable!(),
         }
@@ -80,6 +80,8 @@ impl ToInternal<token::LitKind> for LitKind {
             LitKind::StrRaw(n) => token::StrRaw(n),
             LitKind::ByteStr => token::ByteStr,
             LitKind::ByteStrRaw(n) => token::ByteStrRaw(n),
+            LitKind::CStr => token::CStr,
+            LitKind::CStrRaw(n) => token::CStrRaw(n),
             LitKind::Err => token::Err,
         }
     }
diff --git a/compiler/rustc_feature/src/active.rs b/compiler/rustc_feature/src/active.rs
index 45f462a63ee8c..4e5eebd285bbb 100644
--- a/compiler/rustc_feature/src/active.rs
+++ b/compiler/rustc_feature/src/active.rs
@@ -311,10 +311,10 @@ declare_features! (
     (active, async_closure, "1.37.0", Some(62290), None),
     /// Allows async functions to be declared, implemented, and used in traits.
     (active, async_fn_in_trait, "1.66.0", Some(91611), None),
-    /// Treat `extern "C"` function as nounwind.
-    (active, c_unwind, "1.52.0", Some(74990), None),
     /// Allows `c"foo"` literals.
     (active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None),
+    /// Treat `extern "C"` function as nounwind.
+    (active, c_unwind, "1.52.0", Some(74990), None),
     /// Allows using C-variadics.
     (active, c_variadic, "1.34.0", Some(44930), None),
     /// Allows the use of `#[cfg(sanitize = "option")]`; set when -Zsanitizer is used.
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 95cb1f93e3906..ce8c9ebe7ce77 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -361,65 +361,18 @@ impl Cursor<'_> {
             },
 
             // Byte literal, byte string literal, raw byte string literal or identifier.
-            'b' => match (self.first(), self.second()) {
-                ('\'', _) => {
-                    self.bump();
-                    let terminated = self.single_quoted_string();
-                    let suffix_start = self.pos_within_token();
-                    if terminated {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = Byte { terminated };
-                    Literal { kind, suffix_start }
-                }
-                ('"', _) => {
-                    self.bump();
-                    let terminated = self.double_quoted_string();
-                    let suffix_start = self.pos_within_token();
-                    if terminated {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = ByteStr { terminated };
-                    Literal { kind, suffix_start }
-                }
-                ('r', '"') | ('r', '#') => {
-                    self.bump();
-                    let res = self.raw_double_quoted_string(2);
-                    let suffix_start = self.pos_within_token();
-                    if res.is_ok() {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = RawByteStr { n_hashes: res.ok() };
-                    Literal { kind, suffix_start }
-                }
-                _ => self.ident_or_unknown_prefix(),
-            },
+            'b' => self.c_or_byte_string(
+                |terminated| ByteStr { terminated },
+                |n_hashes| RawByteStr { n_hashes },
+                Some(|terminated| Byte { terminated }),
+            ),
 
-            // TODO deduplicate this code
             // c-string literal, raw c-string literal or identifier.
-            'c' => match (self.first(), self.second()) {
-                ('"', _) => {
-                    self.bump();
-                    let terminated = self.double_quoted_string();
-                    let suffix_start = self.pos_within_token();
-                    if terminated {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = CStr { terminated };
-                    Literal { kind, suffix_start }
-                }
-                ('r', '"') | ('r', '#') => {
-                    self.bump();
-                    let res = self.raw_double_quoted_string(2);
-                    let suffix_start = self.pos_within_token();
-                    if res.is_ok() {
-                        self.eat_literal_suffix();
-                    }
-                    let kind = RawCStr { n_hashes: res.ok() };
-                    Literal { kind, suffix_start }
-                }
-                _ => self.ident_or_unknown_prefix(),
-            },
+            'c' => self.c_or_byte_string(
+                |terminated| CStr { terminated },
+                |n_hashes| RawCStr { n_hashes },
+                None,
+            ),
 
             // Identifier (this should be checked after other variant that can
             // start as identifier).
@@ -583,6 +536,47 @@ impl Cursor<'_> {
         }
     }
 
+    fn c_or_byte_string(
+        &mut self,
+        mk_kind: impl FnOnce(bool) -> LiteralKind,
+        mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind,
+        single_quoted: Option<fn(bool) -> LiteralKind>,
+    ) -> TokenKind {
+        match (self.first(), self.second(), single_quoted) {
+            ('\'', _, Some(mk_kind)) => {
+                self.bump();
+                let terminated = self.single_quoted_string();
+                let suffix_start = self.pos_within_token();
+                if terminated {
+                    self.eat_literal_suffix();
+                }
+                let kind = mk_kind(terminated);
+                Literal { kind, suffix_start }
+            }
+            ('"', _, _) => {
+                self.bump();
+                let terminated = self.double_quoted_string();
+                let suffix_start = self.pos_within_token();
+                if terminated {
+                    self.eat_literal_suffix();
+                }
+                let kind = mk_kind(terminated);
+                Literal { kind, suffix_start }
+            }
+            ('r', '"', _) | ('r', '#', _) => {
+                self.bump();
+                let res = self.raw_double_quoted_string(2);
+                let suffix_start = self.pos_within_token();
+                if res.is_ok() {
+                    self.eat_literal_suffix();
+                }
+                let kind = mk_kind_raw(res.ok());
+                Literal { kind, suffix_start }
+            }
+            _ => self.ident_or_unknown_prefix(),
+        }
+    }
+
     fn number(&mut self, first_digit: char) -> LiteralKind {
         debug_assert!('0' <= self.prev() && self.prev() <= '9');
         let mut base = Base::Decimal;
diff --git a/library/proc_macro/src/bridge/mod.rs b/library/proc_macro/src/bridge/mod.rs
index 54b11c543f162..caecda1bc63fd 100644
--- a/library/proc_macro/src/bridge/mod.rs
+++ b/library/proc_macro/src/bridge/mod.rs
@@ -337,6 +337,8 @@ pub enum LitKind {
     StrRaw(u8),
     ByteStr,
     ByteStrRaw(u8),
+    CStr,
+    CStrRaw(u8),
     Err,
 }
 
@@ -350,6 +352,8 @@ rpc_encode_decode!(
         StrRaw(n),
         ByteStr,
         ByteStrRaw(n),
+        CStr,
+        CStrRaw(n),
         Err,
     }
 );
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
index 674b0c5e23351..b27da26ed23bb 100644
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
@@ -1,3 +1,9 @@
+// gate-test-c_str_literals
+
+macro_rules! m {
+    ($t:tt) => {}
+}
+
 fn main() {
     c"foo";
     //~^ ERROR: `c".."` literals are experimental

From d5e7206ca674661a13d7bbe03284b81031e1ac33 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Mon, 6 Mar 2023 14:09:28 +0000
Subject: [PATCH 04/10] rm diag item, use lang item

---
 library/core/src/ffi/c_str.rs                            | 1 -
 src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs
index 2ac679b6bc347..07b11814f965f 100644
--- a/library/core/src/ffi/c_str.rs
+++ b/library/core/src/ffi/c_str.rs
@@ -79,7 +79,6 @@ use crate::str;
 ///
 /// [str]: prim@str "str"
 #[derive(Hash)]
-#[cfg_attr(not(test), rustc_diagnostic_item = "CStr")]
 #[stable(feature = "core_c_str", since = "1.64.0")]
 #[rustc_has_incoherent_inherent_impls]
 #[cfg_attr(not(bootstrap), lang = "CStr")]
diff --git a/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs b/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs
index 03324c66e8efc..2f2e84fa35a12 100644
--- a/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs
+++ b/src/tools/clippy/clippy_lints/src/strlen_on_c_strings.rs
@@ -1,11 +1,11 @@
 use clippy_utils::diagnostics::span_lint_and_sugg;
 use clippy_utils::source::snippet_with_context;
-use clippy_utils::ty::is_type_diagnostic_item;
+use clippy_utils::ty::{is_type_diagnostic_item, is_type_lang_item};
 use clippy_utils::visitors::is_expr_unsafe;
 use clippy_utils::{get_parent_node, match_libc_symbol};
 use if_chain::if_chain;
 use rustc_errors::Applicability;
-use rustc_hir::{Block, BlockCheckMode, Expr, ExprKind, Node, UnsafeSource};
+use rustc_hir::{Block, BlockCheckMode, Expr, ExprKind, LangItem, Node, UnsafeSource};
 use rustc_lint::{LateContext, LateLintPass};
 use rustc_session::{declare_lint_pass, declare_tool_lint};
 use rustc_span::symbol::sym;
@@ -67,7 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for StrlenOnCStrings {
                 let val_name = snippet_with_context(cx, self_arg.span, ctxt, "..", &mut app).0;
                 let method_name = if is_type_diagnostic_item(cx, ty, sym::cstring_type) {
                     "as_bytes"
-                } else if is_type_diagnostic_item(cx, ty, sym::CStr) {
+                } else if is_type_lang_item(cx, ty, LangItem::CStr) {
                     "to_bytes"
                 } else {
                     return;

From 4c01d494b8233c930868be33cf4880b4267ede82 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Mon, 6 Mar 2023 14:14:55 +0000
Subject: [PATCH 05/10] refactor unescape

---
 compiler/rustc_lexer/src/unescape.rs          | 91 +++++++++++++------
 .../src/lexer/unescape_error_reporting.rs     | 22 +++--
 2 files changed, 75 insertions(+), 38 deletions(-)

diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs
index 4b707c9ec9619..c9ad54d8d9806 100644
--- a/compiler/rustc_lexer/src/unescape.rs
+++ b/compiler/rustc_lexer/src/unescape.rs
@@ -86,7 +86,8 @@ where
             let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte);
             callback(0..(src.len() - chars.as_str().len()), res);
         }
-        Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback),
+        Mode::Str | Mode::ByteStr => unescape_str_common(src, mode, callback),
+
         Mode::RawStr | Mode::RawByteStr => {
             unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
         }
@@ -94,6 +95,7 @@ where
     }
 }
 
+/// A unit within CStr. Must not be a nul character.
 pub enum CStrUnit {
     Byte(u8),
     Char(char),
@@ -164,24 +166,52 @@ impl Mode {
         }
     }
 
-    pub fn is_byte(self) -> bool {
+    /// Non-byte literals should have `\xXX` escapes that are within the ASCII range.
+    pub fn ascii_escapes_should_be_ascii(self) -> bool {
+        match self {
+            Mode::Char | Mode::Str | Mode::RawStr => true,
+            Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => false,
+        }
+    }
+
+    /// Whether characters within the literal must be within the ASCII range
+    pub fn characters_should_be_ascii(self) -> bool {
+        match self {
+            Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
+            Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false,
+        }
+    }
+
+    /// Byte literals do not allow unicode escape.
+    pub fn is_unicode_escape_disallowed(self) -> bool {
         match self {
-            Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => true,
-            Mode::Char | Mode::Str | Mode::RawStr => false,
+            Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
+            Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false,
+        }
+    }
+
+    pub fn prefix_noraw(self) -> &'static str {
+        match self {
+            Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b",
+            Mode::CStr | Mode::RawCStr => "c",
+            Mode::Char | Mode::Str | Mode::RawStr => "",
         }
     }
 }
 
-fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
+fn scan_escape<T: From<u8> + From<char>>(
+    chars: &mut Chars<'_>,
+    mode: Mode,
+) -> Result<T, EscapeError> {
     // Previous character was '\\', unescape what follows.
     let res = match chars.next().ok_or(EscapeError::LoneSlash)? {
-        '"' => '"',
-        'n' => '\n',
-        'r' => '\r',
-        't' => '\t',
-        '\\' => '\\',
-        '\'' => '\'',
-        '0' => '\0',
+        '"' => b'"',
+        'n' => b'\n',
+        'r' => b'\r',
+        't' => b'\t',
+        '\\' => b'\\',
+        '\'' => b'\'',
+        '0' => b'\0',
 
         'x' => {
             // Parse hexadecimal character code.
@@ -194,22 +224,23 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
 
             let value = hi * 16 + lo;
 
-            // For a non-byte literal verify that it is within ASCII range.
-            if !is_byte && !is_ascii(value) {
+            if mode.ascii_escapes_should_be_ascii() && !is_ascii(value) {
                 return Err(EscapeError::OutOfRangeHexEscape);
             }
-            let value = value as u8;
 
-            value as char
+            value as u8
         }
 
-        'u' => scan_unicode(chars, is_byte)?,
+        'u' => return scan_unicode(chars, mode.is_unicode_escape_disallowed()).map(Into::into),
         _ => return Err(EscapeError::InvalidEscape),
     };
-    Ok(res)
+    Ok(res.into())
 }
 
-fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
+fn scan_unicode(
+    chars: &mut Chars<'_>,
+    is_unicode_escape_disallowed: bool,
+) -> Result<char, EscapeError> {
     // We've parsed '\u', now we have to parse '{..}'.
 
     if chars.next() != Some('{') {
@@ -237,7 +268,7 @@ fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeErro
 
                 // Incorrect syntax has higher priority for error reporting
                 // than unallowed value for a literal.
-                if is_byte {
+                if is_unicode_escape_disallowed {
                     return Err(EscapeError::UnicodeEscapeInByte);
                 }
 
@@ -263,8 +294,8 @@ fn scan_unicode(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeErro
 }
 
 #[inline]
-fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
-    if is_byte && !c.is_ascii() {
+fn ascii_check(c: char, characters_should_be_ascii: bool) -> Result<char, EscapeError> {
+    if characters_should_be_ascii && !c.is_ascii() {
         // Byte literal can't be a non-ascii character.
         Err(EscapeError::NonAsciiCharInByte)
     } else {
@@ -275,7 +306,7 @@ fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
 fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
     let c = chars.next().ok_or(EscapeError::ZeroChars)?;
     let res = match c {
-        '\\' => scan_escape(chars, is_byte),
+        '\\' => scan_escape(chars, if is_byte { Mode::Byte } else { Mode::Char }),
         '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
         '\r' => Err(EscapeError::BareCarriageReturn),
         _ => ascii_check(c, is_byte),
@@ -288,9 +319,9 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, E
 
 /// Takes a contents of a string literal (without quotes) and produces a
 /// sequence of escaped characters or errors.
-fn unescape_str_or_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
+fn unescape_str_common<F, T: From<u8> + From<char>>(src: &str, mode: Mode, callback: &mut F)
 where
-    F: FnMut(Range<usize>, Result<char, EscapeError>),
+    F: FnMut(Range<usize>, Result<T, EscapeError>),
 {
     let mut chars = src.chars();
 
@@ -312,17 +343,17 @@ where
                         });
                         continue;
                     }
-                    _ => scan_escape(&mut chars, is_byte),
+                    _ => scan_escape::<T>(&mut chars, mode),
                 }
             }
-            '\n' => Ok('\n'),
-            '\t' => Ok('\t'),
+            '\n' => Ok(b'\n'.into()),
+            '\t' => Ok(b'\t'.into()),
             '"' => Err(EscapeError::EscapeOnlyChar),
             '\r' => Err(EscapeError::BareCarriageReturn),
-            _ => ascii_check(c, is_byte),
+            _ => ascii_check(c, mode.characters_should_be_ascii()).map(Into::into),
         };
         let end = src.len() - chars.as_str().len();
-        callback(start..end, res);
+        callback(start..end, res.map(Into::into));
     }
 }
 
diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 0d12ec6081d83..2e4c798ab2259 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error(
                 }
             };
             let sugg = sugg.unwrap_or_else(|| {
-                let is_byte = mode.is_byte();
-                let prefix = if is_byte { "b" } else { "" };
+                let prefix = mode.prefix_noraw();
                 let mut escaped = String::with_capacity(lit.len());
                 let mut chrs = lit.chars().peekable();
                 while let Some(first) = chrs.next() {
@@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error(
                     };
                 }
                 let sugg = format!("{prefix}\"{escaped}\"");
-                MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
+                MoreThanOneCharSugg::Quotes {
+                    span: span_with_quotes,
+                    is_byte: mode == Mode::Byte,
+                    sugg,
+                }
             });
             handler.emit_err(UnescapeError::MoreThanOneChar {
                 span: span_with_quotes,
@@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error(
                 char_span,
                 escaped_sugg: c.escape_default().to_string(),
                 escaped_msg: escaped_char(c),
-                byte: mode.is_byte(),
+                byte: mode == Mode::Byte,
             });
         }
         EscapeError::BareCarriageReturn => {
@@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error(
         EscapeError::InvalidEscape => {
             let (c, span) = last_char();
 
-            let label =
-                if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
+            let label = if mode == Mode::Byte || mode == Mode::ByteStr {
+                "unknown byte escape"
+            } else {
+                "unknown character escape"
+            };
             let ec = escaped_char(c);
             let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
             diag.span_label(span, label);
-            if c == '{' || c == '}' && !mode.is_byte() {
+            if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
                 diag.help(
                     "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
                 );
@@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error(
                      version control settings",
                 );
             } else {
-                if !mode.is_byte() {
+                if mode == Mode::Str || mode == Mode::Char {
                     diag.span_suggestion(
                         span_with_quotes,
                         "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",

From 78e3455d375feb5d100a43110f78b405a8ff05f1 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Mon, 6 Mar 2023 14:15:02 +0000
Subject: [PATCH 06/10] address comments

---
 compiler/rustc_ast/src/ast.rs   | 2 +-
 compiler/rustc_lexer/src/lib.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs
index fb90d309fcde7..8555ba3e7cee4 100644
--- a/compiler/rustc_ast/src/ast.rs
+++ b/compiler/rustc_ast/src/ast.rs
@@ -1814,7 +1814,7 @@ pub enum LitKind {
     /// A byte string (`b"foo"`). Not stored as a symbol because it might be
     /// non-utf8, and symbols only allow utf8 strings.
     ByteStr(Lrc<[u8]>, StrStyle),
-    /// A C String (`c"foo"`). Guaranteed only have `\0` in the end.
+    /// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
     CStr(Lrc<[u8]>, StrStyle),
     /// A byte char (`b'f'`).
     Byte(u8),
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index ce8c9ebe7ce77..c07dc19a0ac3a 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -194,7 +194,7 @@ pub enum LiteralKind {
     /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
     /// indicates an invalid literal.
     RawByteStr { n_hashes: Option<u8> },
-    /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` is invalid.
+    /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
     RawCStr { n_hashes: Option<u8> },
 }
 

From bf3ca5979e47774802e95623c11e71fb303e5ff3 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Tue, 7 Mar 2023 05:09:19 +0000
Subject: [PATCH 07/10] try gating early, add non-ascii test

---
 compiler/rustc_ast_passes/src/feature_gate.rs         |  1 -
 compiler/rustc_parse/src/parser/expr.rs               |  1 +
 tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs | 10 ++++++++++
 3 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs

diff --git a/compiler/rustc_ast_passes/src/feature_gate.rs b/compiler/rustc_ast_passes/src/feature_gate.rs
index 9defc6603e837..17bcd24ee39fd 100644
--- a/compiler/rustc_ast_passes/src/feature_gate.rs
+++ b/compiler/rustc_ast_passes/src/feature_gate.rs
@@ -1,4 +1,3 @@
-use ast::token;
 use rustc_ast as ast;
 use rustc_ast::visit::{self, AssocCtxt, FnCtxt, FnKind, Visitor};
 use rustc_ast::{attr, AssocConstraint, AssocConstraintKind, NodeId};
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index bff9de5c652f9..c89b4ca8d6f0f 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -1922,6 +1922,7 @@ impl<'a> Parser<'a> {
         let recovered = self.recover_after_dot();
         let token = recovered.as_ref().unwrap_or(&self.token);
         let span = token.span;
+
         token::Lit::from_token(token).map(|token_lit| {
             self.bump();
             (token_lit, span)
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
new file mode 100644
index 0000000000000..82e8e2090d7db
--- /dev/null
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
@@ -0,0 +1,10 @@
+// run-pass
+
+#![feature(c_str_literals)]
+
+fn main() {
+    assert_eq!(
+        c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
+        &[0xEF, 0x80, 0xF0, 0x9F, 0xA6, 0x80, 0xF0, 0x9F, 0xA6, 0x80, 0x00],
+    );
+}

From abb181dfd9b9df22908ab08d7cfb46509295e2e6 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Fri, 10 Mar 2023 14:18:58 +0000
Subject: [PATCH 08/10] make it semantic error

---
 compiler/rustc_ast/src/util/literal.rs          |   2 ++
 compiler/rustc_ast_passes/src/feature_gate.rs   |   1 +
 compiler/rustc_parse/src/lexer/mod.rs           |   3 +++
 compiler/rustc_session/messages.ftl             |   2 ++
 compiler/rustc_session/src/errors.rs            |  15 ++++++++++++++-
 .../rfcs/rfc-3348-c-string-literals/no-nuls.rs  | Bin 322 -> 570 bytes
 .../rfc-3348-c-string-literals/no-nuls.stderr   | Bin 670 -> 674 bytes
 7 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs
index cd3b163e3ac97..15a54fe13d0b7 100644
--- a/compiler/rustc_ast/src/util/literal.rs
+++ b/compiler/rustc_ast/src/util/literal.rs
@@ -8,6 +8,7 @@ use rustc_lexer::unescape::{
 };
 use rustc_span::symbol::{kw, sym, Symbol};
 use rustc_span::Span;
+use std::ops::Range;
 use std::{ascii, fmt, str};
 
 // Escapes a string, represented as a symbol. Reuses the original symbol,
@@ -38,6 +39,7 @@ pub enum LitError {
     InvalidFloatSuffix,
     NonDecimalFloat(u32),
     IntTooLarge(u32),
+    NulInCStr(Range<usize>),
 }
 
 impl LitKind {
diff --git a/compiler/rustc_ast_passes/src/feature_gate.rs b/compiler/rustc_ast_passes/src/feature_gate.rs
index 17bcd24ee39fd..c4578ec4af1ef 100644
--- a/compiler/rustc_ast_passes/src/feature_gate.rs
+++ b/compiler/rustc_ast_passes/src/feature_gate.rs
@@ -572,6 +572,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session) {
             }
         };
     }
+    gate_all!(c_str_literals, "`c\"..\"` literals are experimental");
     gate_all!(
         if_let_guard,
         "`if let` guards are experimental",
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index b2050780309f8..050f18986154a 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -204,6 +204,9 @@ impl<'a> StringReader<'a> {
                 rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
                     let suffix_start = start + BytePos(suffix_start);
                     let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
+                    if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind {
+                        self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos));
+                    }
                     let suffix = if suffix_start < self.pos {
                         let string = self.str_from(suffix_start);
                         if string == "_" {
diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl
index ff53f22d43f93..2420857e739ed 100644
--- a/compiler/rustc_session/messages.ftl
+++ b/compiler/rustc_session/messages.ftl
@@ -93,3 +93,5 @@ session_invalid_int_literal_width = invalid width `{$width}` for integer literal
     .help = valid widths are 8, 16, 32, 64 and 128
 
 session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg}
+
+session_nul_in_c_str = null characters in C string literals are not supported
diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs
index bd32adbbdbb54..22af74eb1d9e0 100644
--- a/compiler/rustc_session/src/errors.rs
+++ b/compiler/rustc_session/src/errors.rs
@@ -6,7 +6,7 @@ use rustc_ast::token;
 use rustc_ast::util::literal::LitError;
 use rustc_errors::{error_code, DiagnosticMessage, EmissionGuarantee, IntoDiagnostic, MultiSpan};
 use rustc_macros::Diagnostic;
-use rustc_span::{Span, Symbol};
+use rustc_span::{BytePos, Span, Symbol};
 use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple};
 
 #[derive(Diagnostic)]
@@ -307,6 +307,13 @@ pub(crate) struct BinaryFloatLiteralNotSupported {
     pub span: Span,
 }
 
+#[derive(Diagnostic)]
+#[diag(session_nul_in_c_str)]
+pub(crate) struct NulInCStr {
+    #[primary_span]
+    pub span: Span,
+}
+
 pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) {
     // Checks if `s` looks like i32 or u1234 etc.
     fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
@@ -385,6 +392,12 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span:
             };
             sess.emit_err(IntLiteralTooLarge { span, limit });
         }
+        LitError::NulInCStr(range) => {
+            let lo = BytePos(span.lo().0 + range.start as u32 + 2);
+            let hi = BytePos(span.lo().0 + range.end as u32 + 2);
+            let span = span.with_lo(lo).with_hi(hi);
+            sess.emit_err(NulInCStr { span });
+        }
     }
 }
 
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
index f6c86a1ba8762358537827376cbffc5c3f4508e9..e66519f294cd08f751f03b725967b5cd60dcee1d 100644
GIT binary patch
literal 570
zcmbV}F>Avx5QRPKSDYOWQkQs6Xd#4jDiqw&PK^+9YLJy6ouS3JzdaYZZGy>C-z1*&
z?w-C|#6_(oc209ud32R&P&;Y7*fUmJXk}x$fSv)BO3Ex*hvRbj{SLc4f6Z55J7Yk7
zNGBC}Jv0`!K)o7!V86bu&$3~jH=1WFKeKsZUGm?F17odJ?pV3bXdi=aPx$r2jHf(B
zTp@}F0gq464{Jv67lo=1{CV^8wj<I68nwrJpo#e{JM|ZiF0MaRW9UFTOpke3mKe8n
WmBwCEIZCEV=sgiHGf1NJmpTKC`k6EU

delta 34
qcmdnRa)@a{(8OTtiT!31k6BJ)H0M{y%u{eyC@v|=%uDC0<pKcUrwio(

diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
index 8a1669b285adaaf14161a49780a7a61dcf4673c9..ff9006f6f97f1be7eed18d188e8c30ba2075743e 100644
GIT binary patch
literal 674
zcmcJNJqyAx5Qd%eD_)9gHR2akaM4BR<Xj4|4H^hZxg;W3|GTkcTgTpXA8^lm$18~}
zWiUZ&R1ZO-@{$~s!3oZjGIwYdTS7Ysk}%f8g{HCOWzB#zJz{>oT$jeCafw@y%Qo3Q
zPSCPEM;jBMN(!DFx@K1cWyzSFfW04Mhd1Y+KEI4HjfNE;5i#rWCql1B(8Y@nzCYRR
fyP*I|{vI!G(MUg&)j`5rY|)T>DjOU+cR0qrx?#zt

literal 670
zcmcJNI}5@v5XYVKDgG4KYQ#q+f{QLfC+AYAHE5wEa!Ew6es@F0SVV;0bT96AFG-|H
zK)JR>eXXdftt2!GI~Yrix-cM3w;1ma$Enh(Pc#!UVj(ggLC5kOomYT5DR{C8&R_#&
z`K>OKf1<NKw0m;?it(kFL0Xvco*qUKei-t+hg>}P$Lfcgd?nWL;VFqm$4^_YPMWO$
VFr1x))mY+2^4@4?5MIMD_6Dmm#F_vA


From 6d905a8cc14783bc577dc0534d2516d16ef3e43b Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Fri, 10 Mar 2023 14:29:26 +0000
Subject: [PATCH 09/10] fix tidy

---
 .../rfcs/rfc-3348-c-string-literals/no-nuls.rs  | Bin 570 -> 565 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
index e66519f294cd08f751f03b725967b5cd60dcee1d..7bc6097f124aabfded4a9e9791611cfe4fbf9f78 100644
GIT binary patch
delta 7
OcmdnRvXy0nDH8w-N&;U1

delta 13
ScmdnWvWsPdDHAIf2mk;YE&`SS


From d30c6681751b10a14265e09e5f74f39d2a32e641 Mon Sep 17 00:00:00 2001
From: Deadbeef <ent3rm4n@gmail.com>
Date: Fri, 28 Apr 2023 15:28:52 +0000
Subject: [PATCH 10/10] make cook generic

---
 compiler/rustc_parse/src/lexer/mod.rs | 64 +++++++++++----------------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 050f18986154a..9d0037b8a8083 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -1,3 +1,5 @@
+use std::ops::Range;
+
 use crate::errors;
 use crate::lexer::unicode_chars::UNICODE_ARRAY;
 use crate::make_unclosed_delims_error;
@@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
-use rustc_lexer::unescape::{self, Mode};
+use rustc_lexer::unescape::{self, EscapeError, Mode};
 use rustc_lexer::Cursor;
 use rustc_lexer::{Base, DocStyle, RawStrError};
 use rustc_session::lint::builtin::{
@@ -670,7 +672,7 @@ impl<'a> StringReader<'a> {
         self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
     }
 
-    fn cook_quoted(
+    fn cook_common(
         &self,
         kind: token::LitKind,
         mode: Mode,
@@ -678,12 +680,13 @@ impl<'a> StringReader<'a> {
         end: BytePos,
         prefix_len: u32,
         postfix_len: u32,
+        unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
     ) -> (token::LitKind, Symbol) {
         let mut has_fatal_err = false;
         let content_start = start + BytePos(prefix_len);
         let content_end = end - BytePos(postfix_len);
         let lit_content = self.str_from_to(content_start, content_end);
-        unescape::unescape_literal(lit_content, mode, &mut |range, result| {
+        unescape(lit_content, mode, &mut |range, result| {
             // Here we only check for errors. The actual unescaping is done later.
             if let Err(err) = result {
                 let span_with_quotes = self.mk_sp(start, end);
@@ -715,7 +718,7 @@ impl<'a> StringReader<'a> {
         }
     }
 
-    fn cook_c_string(
+    fn cook_quoted(
         &self,
         kind: token::LitKind,
         mode: Mode,
@@ -724,40 +727,27 @@ impl<'a> StringReader<'a> {
         prefix_len: u32,
         postfix_len: u32,
     ) -> (token::LitKind, Symbol) {
-        let mut has_fatal_err = false;
-        let content_start = start + BytePos(prefix_len);
-        let content_end = end - BytePos(postfix_len);
-        let lit_content = self.str_from_to(content_start, content_end);
-        unescape::unescape_c_string(lit_content, mode, &mut |range, result| {
-            // Here we only check for errors. The actual unescaping is done later.
-            if let Err(err) = result {
-                let span_with_quotes = self.mk_sp(start, end);
-                let (start, end) = (range.start as u32, range.end as u32);
-                let lo = content_start + BytePos(start);
-                let hi = lo + BytePos(end - start);
-                let span = self.mk_sp(lo, hi);
-                if err.is_fatal() {
-                    has_fatal_err = true;
-                }
-                emit_unescape_error(
-                    &self.sess.span_diagnostic,
-                    lit_content,
-                    span_with_quotes,
-                    span,
-                    mode,
-                    range,
-                    err,
-                );
-            }
-        });
+        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
+            unescape::unescape_literal(src, mode, &mut |span, result| {
+                callback(span, result.map(drop))
+            })
+        })
+    }
 
-        // We normally exclude the quotes for the symbol, but for errors we
-        // include it because it results in clearer error messages.
-        if !has_fatal_err {
-            (kind, Symbol::intern(lit_content))
-        } else {
-            (token::Err, self.symbol_from_to(start, end))
-        }
+    fn cook_c_string(
+        &self,
+        kind: token::LitKind,
+        mode: Mode,
+        start: BytePos,
+        end: BytePos,
+        prefix_len: u32,
+        postfix_len: u32,
+    ) -> (token::LitKind, Symbol) {
+        self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
+            unescape::unescape_c_string(src, mode, &mut |span, result| {
+                callback(span, result.map(drop))
+            })
+        })
     }
 }