Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MAX_LEN_UTF8 and MAX_LEN_UTF16 Constants #120580

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
#![feature(async_closure)]
#![feature(async_fn_traits)]
#![feature(async_iterator)]
#![feature(char_max_len)]
#![feature(clone_to_uninit)]
#![feature(coerce_unsized)]
#![feature(const_align_of_val)]
Expand Down
8 changes: 5 additions & 3 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1355,7 +1355,9 @@ impl String {
pub fn push(&mut self, ch: char) {
match ch.len_utf8() {
1 => self.vec.push(ch as u8),
_ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
_ => {
self.vec.extend_from_slice(ch.encode_utf8(&mut [0; char::MAX_LEN_UTF8]).as_bytes())
}
}
}

Expand Down Expand Up @@ -1651,7 +1653,7 @@ impl String {
#[rustc_confusables("set")]
pub fn insert(&mut self, idx: usize, ch: char) {
assert!(self.is_char_boundary(idx));
let mut bits = [0; 4];
let mut bits = [0; char::MAX_LEN_UTF8];
let bits = ch.encode_utf8(&mut bits).as_bytes();

unsafe {
Expand Down Expand Up @@ -2584,7 +2586,7 @@ impl ToString for core::ascii::Char {
impl ToString for char {
#[inline]
fn to_string(&self) -> String {
String::from(self.encode_utf8(&mut [0; 4]))
String::from(self.encode_utf8(&mut [0; char::MAX_LEN_UTF8]))
}
}

Expand Down
1 change: 1 addition & 0 deletions library/alloc/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#![feature(iter_array_chunks)]
#![feature(assert_matches)]
#![feature(btree_extract_if)]
#![feature(char_max_len)]
#![feature(cow_is_borrowed)]
#![feature(const_cow_is_borrowed)]
#![feature(const_heap)]
Expand Down
5 changes: 3 additions & 2 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use std::assert_matches::assert_matches;
use std::borrow::Cow;
use std::char::MAX_LEN_UTF8;
use std::cmp::Ordering::{Equal, Greater, Less};
use std::str::{from_utf8, from_utf8_unchecked};

Expand Down Expand Up @@ -1231,7 +1232,7 @@ fn test_to_uppercase_rev_iterator() {
#[test]
#[cfg_attr(miri, ignore)] // Miri is too slow
fn test_chars_decoding() {
let mut bytes = [0; 4];
let mut bytes = [0; MAX_LEN_UTF8];
for c in (0..0x110000).filter_map(std::char::from_u32) {
let s = c.encode_utf8(&mut bytes);
if Some(c) != s.chars().next() {
Expand All @@ -1243,7 +1244,7 @@ fn test_chars_decoding() {
#[test]
#[cfg_attr(miri, ignore)] // Miri is too slow
fn test_chars_rev_decoding() {
let mut bytes = [0; 4];
let mut bytes = [0; MAX_LEN_UTF8];
for c in (0..0x110000).filter_map(std::char::from_u32) {
let s = c.encode_utf8(&mut bytes);
if Some(c) != s.chars().rev().next() {
Expand Down
10 changes: 10 additions & 0 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ impl char {
#[stable(feature = "assoc_char_consts", since = "1.52.0")]
pub const MAX: char = '\u{10ffff}';

/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
/// UTF-8 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF8: usize = 4;

/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
/// to UTF-16 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF16: usize = 2;

/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
/// decoding error.
///
Expand Down
10 changes: 10 additions & 0 deletions library/core/src/char/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ const MAX_THREE_B: u32 = 0x10000;
#[stable(feature = "rust1", since = "1.0.0")]
pub const MAX: char = char::MAX;

/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
/// UTF-8 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;

/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
/// to UTF-16 encoding.
#[unstable(feature = "char_max_len", issue = "121714")]
pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;

/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
#[stable(feature = "decode_utf16", since = "1.9.0")]
Expand Down
6 changes: 3 additions & 3 deletions library/core/src/fmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#![stable(feature = "rust1", since = "1.0.0")]

use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell};
use crate::char::EscapeDebugExtArgs;
use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8};
use crate::marker::PhantomData;
use crate::num::fmt as numfmt;
use crate::ops::Deref;
Expand Down Expand Up @@ -172,7 +172,7 @@ pub trait Write {
/// ```
#[stable(feature = "fmt_write_char", since = "1.1.0")]
fn write_char(&mut self, c: char) -> Result {
self.write_str(c.encode_utf8(&mut [0; 4]))
self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8]))
}

/// Glue for usage of the [`write!`] macro with implementors of this trait.
Expand Down Expand Up @@ -2498,7 +2498,7 @@ impl Display for char {
if f.width.is_none() && f.precision.is_none() {
f.write_char(*self)
} else {
f.pad(self.encode_utf8(&mut [0; 4]))
f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion library/core/src/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
issue = "27721"
)]

use crate::char::MAX_LEN_UTF8;
use crate::cmp::Ordering;
use crate::convert::TryInto as _;
use crate::slice::memchr;
Expand Down Expand Up @@ -547,7 +548,7 @@ impl Pattern for char {

#[inline]
fn into_searcher(self, haystack: &str) -> Self::Searcher<'_> {
let mut utf8_encoded = [0; 4];
let mut utf8_encoded = [0; MAX_LEN_UTF8];
let utf8_size = self
.encode_utf8(&mut utf8_encoded)
.len()
Expand Down
3 changes: 2 additions & 1 deletion library/core/tests/char.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::char::MAX_LEN_UTF8;
use std::str::FromStr;
use std::{char, str};

Expand Down Expand Up @@ -259,7 +260,7 @@ fn test_escape_unicode() {
#[test]
fn test_encode_utf8() {
fn check(input: char, expect: &[u8]) {
let mut buf = [0; 4];
let mut buf = [0; MAX_LEN_UTF8];
let ptr = buf.as_ptr();
let s = input.encode_utf8(&mut buf);
assert_eq!(s.as_ptr() as usize, ptr as usize);
Expand Down
1 change: 1 addition & 0 deletions library/core/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#![feature(async_iterator)]
#![feature(bigint_helper_methods)]
#![feature(cell_update)]
#![feature(char_max_len)]
#![feature(clone_to_uninit)]
#![feature(const_align_of_val_raw)]
#![feature(const_align_offset)]
Expand Down
7 changes: 4 additions & 3 deletions library/std/src/fs/tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use rand::RngCore;

use crate::char::MAX_LEN_UTF8;
#[cfg(target_os = "macos")]
use crate::ffi::{c_char, c_int};
use crate::fs::{self, File, FileTimes, OpenOptions};
Expand Down Expand Up @@ -170,7 +171,7 @@ fn file_test_io_non_positional_read() {
#[test]
fn file_test_io_seek_and_tell_smoke_test() {
let message = "ten-four";
let mut read_mem = [0; 4];
let mut read_mem = [0; MAX_LEN_UTF8];
let set_cursor = 4 as u64;
let tell_pos_pre_read;
let tell_pos_post_read;
Expand Down Expand Up @@ -225,7 +226,7 @@ fn file_test_io_seek_shakedown() {
let chunk_one: &str = "qwer";
let chunk_two: &str = "asdf";
let chunk_three: &str = "zxcv";
let mut read_mem = [0; 4];
let mut read_mem = [0; MAX_LEN_UTF8];
let tmpdir = tmpdir();
let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt");
{
Expand Down Expand Up @@ -490,7 +491,7 @@ fn file_test_directoryinfo_readdir() {
check!(w.write(msg));
}
let files = check!(fs::read_dir(dir));
let mut mem = [0; 4];
let mut mem = [0; MAX_LEN_UTF8];
for f in files {
let f = f.unwrap().path();
{
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@
#![feature(cfg_sanitizer_cfi)]
#![feature(cfg_target_thread_local)]
#![feature(cfi_encoding)]
#![feature(char_max_len)]
#![feature(concat_idents)]
#![feature(const_mut_refs)]
#![feature(decl_macro)]
Expand Down
3 changes: 2 additions & 1 deletion library/std/src/sys/pal/windows/stdio.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#![unstable(issue = "none", feature = "windows_stdio")]

use core::char::MAX_LEN_UTF8;
use core::str::utf8_char_width;

use super::api::{self, WinError};
Expand Down Expand Up @@ -404,7 +405,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {

impl IncompleteUtf8 {
pub const fn new() -> IncompleteUtf8 {
IncompleteUtf8 { bytes: [0; 4], len: 0 }
IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 }
}
}

Expand Down
6 changes: 3 additions & 3 deletions library/std/src/sys_common/wtf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#[cfg(test)]
mod tests;

use core::char::{encode_utf16_raw, encode_utf8_raw};
use core::char::{encode_utf16_raw, encode_utf8_raw, MAX_LEN_UTF16, MAX_LEN_UTF8};
use core::clone::CloneToUninit;
use core::str::next_code_point;

Expand Down Expand Up @@ -241,7 +241,7 @@ impl Wtf8Buf {
/// Copied from String::push
/// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
let mut bytes = [0; 4];
let mut bytes = [0; MAX_LEN_UTF8];
let bytes = encode_utf8_raw(code_point.value, &mut bytes);
self.bytes.extend_from_slice(bytes)
}
Expand Down Expand Up @@ -1002,7 +1002,7 @@ impl<'a> Iterator for EncodeWide<'a> {
return Some(tmp);
}

let mut buf = [0; 2];
let mut buf = [0; MAX_LEN_UTF16];
self.code_points.next().map(|code_point| {
let n = encode_utf16_raw(code_point.value, &mut buf).len();
if n == 2 {
Expand Down
Loading