Skip to content

Commit

Permalink
add MAX_LEN_UTF8 and MAX_LEN_UTF16 constants
Browse files Browse the repository at this point in the history
  • Loading branch information
HTGAzureX1212 committed Feb 28, 2024
1 parent ef32456 commit ae30dc9
Show file tree
Hide file tree
Showing 14 changed files with 48 additions and 17 deletions.
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
#![feature(assert_matches)]
#![feature(async_fn_traits)]
#![feature(async_iterator)]
#![feature(char_max_len)]
#![feature(coerce_unsized)]
#![feature(const_align_of_val)]
#![feature(const_box)]
Expand Down
8 changes: 5 additions & 3 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#![stable(feature = "rust1", since = "1.0.0")]

use core::char::MAX_LEN_UTF8;
use core::error::Error;
use core::fmt;
use core::hash;
Expand Down Expand Up @@ -1343,9 +1344,10 @@ impl String {
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn push(&mut self, ch: char) {

match ch.len_utf8() {
1 => self.vec.push(ch as u8),
_ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
_ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; MAX_LEN_UTF8]).as_bytes()),
}
}

Expand Down Expand Up @@ -1644,7 +1646,7 @@ impl String {
#[rustc_confusables("set")]
pub fn insert(&mut self, idx: usize, ch: char) {
assert!(self.is_char_boundary(idx));
let mut bits = [0; 4];
let mut bits = [0; MAX_LEN_UTF8];
let bits = ch.encode_utf8(&mut bits).as_bytes();

unsafe {
Expand Down Expand Up @@ -2633,7 +2635,7 @@ impl ToString for core::ascii::Char {
impl ToString for char {
#[inline]
fn to_string(&self) -> String {
String::from(self.encode_utf8(&mut [0; 4]))
String::from(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))
}
}

Expand Down
1 change: 1 addition & 0 deletions library/alloc/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#![feature(iter_array_chunks)]
#![feature(assert_matches)]
#![feature(btree_extract_if)]
#![feature(char_max_len)]
#![feature(cow_is_borrowed)]
#![feature(const_cow_is_borrowed)]
#![feature(const_heap)]
Expand Down
5 changes: 3 additions & 2 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use std::assert_matches::assert_matches;
use std::borrow::Cow;
use std::char::MAX_LEN_UTF8;
use std::cmp::Ordering::{Equal, Greater, Less};
use std::str::{from_utf8, from_utf8_unchecked};

Expand Down Expand Up @@ -1230,7 +1231,7 @@ fn test_to_uppercase_rev_iterator() {
#[test]
#[cfg_attr(miri, ignore)] // Miri is too slow
fn test_chars_decoding() {
let mut bytes = [0; 4];
let mut bytes = [0; MAX_LEN_UTF8];
for c in (0..0x110000).filter_map(std::char::from_u32) {
let s = c.encode_utf8(&mut bytes);
if Some(c) != s.chars().next() {
Expand All @@ -1242,7 +1243,7 @@ fn test_chars_decoding() {
#[test]
#[cfg_attr(miri, ignore)] // Miri is too slow
fn test_chars_rev_decoding() {
let mut bytes = [0; 4];
let mut bytes = [0; MAX_LEN_UTF8];
for c in (0..0x110000).filter_map(std::char::from_u32) {
let s = c.encode_utf8(&mut bytes);
if Some(c) != s.chars().rev().next() {
Expand Down
10 changes: 10 additions & 0 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ impl char {
#[stable(feature = "assoc_char_consts", since = "1.52.0")]
pub const MAX: char = '\u{10ffff}';

/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
/// UTF-8 encoding.
#[unstable(feature = "char_max_len", issue = "none")]
pub const MAX_LEN_UTF8: usize = 4;

/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
/// to UTF-16 encoding.
#[unstable(feature = "char_max_len", issue = "none")]
pub const MAX_LEN_UTF16: usize = 2;

/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
/// decoding error.
///
Expand Down
10 changes: 10 additions & 0 deletions library/core/src/char/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ const MAX_THREE_B: u32 = 0x10000;
#[stable(feature = "rust1", since = "1.0.0")]
pub const MAX: char = char::MAX;

/// The maximum number of bytes required to [encode](char::encode_utf8) a `char` to
/// UTF-8 encoding.
#[unstable(feature = "char_max_len", issue = "none")]
pub const MAX_LEN_UTF8: usize = char::MAX_LEN_UTF8;

/// The maximum number of two-byte units required to [encode](char::encode_utf16) a `char`
/// to UTF-16 encoding.
#[unstable(feature = "char_max_len", issue = "none")]
pub const MAX_LEN_UTF16: usize = char::MAX_LEN_UTF16;

/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
#[stable(feature = "decode_utf16", since = "1.9.0")]
Expand Down
6 changes: 3 additions & 3 deletions library/core/src/fmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#![stable(feature = "rust1", since = "1.0.0")]

use crate::cell::{Cell, Ref, RefCell, RefMut, SyncUnsafeCell, UnsafeCell};
use crate::char::EscapeDebugExtArgs;
use crate::char::{EscapeDebugExtArgs, MAX_LEN_UTF8};
use crate::iter;
use crate::marker::PhantomData;
use crate::mem;
Expand Down Expand Up @@ -164,7 +164,7 @@ pub trait Write {
/// ```
#[stable(feature = "fmt_write_char", since = "1.1.0")]
fn write_char(&mut self, c: char) -> Result {
self.write_str(c.encode_utf8(&mut [0; 4]))
self.write_str(c.encode_utf8(&mut [0; MAX_LEN_UTF8]))
}

/// Glue for usage of the [`write!`] macro with implementors of this trait.
Expand Down Expand Up @@ -2387,7 +2387,7 @@ impl Display for char {
if f.width.is_none() && f.precision.is_none() {
f.write_char(*self)
} else {
f.pad(self.encode_utf8(&mut [0; 4]))
f.pad(self.encode_utf8(&mut [0; MAX_LEN_UTF8]))
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion library/core/src/str/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
issue = "27721"
)]

use crate::char::MAX_LEN_UTF8;
use crate::cmp;
use crate::cmp::Ordering;
use crate::convert::TryInto as _;
Expand Down Expand Up @@ -548,7 +549,7 @@ impl<'a> Pattern<'a> for char {

#[inline]
fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
let mut utf8_encoded = [0; 4];
let mut utf8_encoded = [0; MAX_LEN_UTF8];
let utf8_size = self
.encode_utf8(&mut utf8_encoded)
.len()
Expand Down
3 changes: 2 additions & 1 deletion library/core/tests/char.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::char::MAX_LEN_UTF8;
use std::str::FromStr;
use std::{char, str};

Expand Down Expand Up @@ -259,7 +260,7 @@ fn test_escape_unicode() {
#[test]
fn test_encode_utf8() {
fn check(input: char, expect: &[u8]) {
let mut buf = [0; 4];
let mut buf = [0; MAX_LEN_UTF8];
let ptr = buf.as_ptr();
let s = input.encode_utf8(&mut buf);
assert_eq!(s.as_ptr() as usize, ptr as usize);
Expand Down
1 change: 1 addition & 0 deletions library/core/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#![feature(async_iterator)]
#![feature(bigint_helper_methods)]
#![feature(cell_update)]
#![feature(char_max_len)]
#![feature(const_align_offset)]
#![feature(const_align_of_val_raw)]
#![feature(const_black_box)]
Expand Down
7 changes: 4 additions & 3 deletions library/std/src/fs/tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::io::prelude::*;

use crate::char::MAX_LEN_UTF8;
use crate::env;
use crate::fs::{self, File, FileTimes, OpenOptions};
use crate::io::{BorrowedBuf, ErrorKind, SeekFrom};
Expand Down Expand Up @@ -176,7 +177,7 @@ fn file_test_io_non_positional_read() {
#[test]
fn file_test_io_seek_and_tell_smoke_test() {
let message = "ten-four";
let mut read_mem = [0; 4];
let mut read_mem = [0; MAX_LEN_UTF8];
let set_cursor = 4 as u64;
let tell_pos_pre_read;
let tell_pos_post_read;
Expand Down Expand Up @@ -231,7 +232,7 @@ fn file_test_io_seek_shakedown() {
let chunk_one: &str = "qwer";
let chunk_two: &str = "asdf";
let chunk_three: &str = "zxcv";
let mut read_mem = [0; 4];
let mut read_mem = [0; MAX_LEN_UTF8];
let tmpdir = tmpdir();
let filename = &tmpdir.join("file_rt_io_file_test_seek_shakedown.txt");
{
Expand Down Expand Up @@ -496,7 +497,7 @@ fn file_test_directoryinfo_readdir() {
check!(w.write(msg));
}
let files = check!(fs::read_dir(dir));
let mut mem = [0; 4];
let mut mem = [0; MAX_LEN_UTF8];
for f in files {
let f = f.unwrap().path();
{
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@
#![feature(cfg_sanitizer_cfi)]
#![feature(cfg_target_thread_local)]
#![feature(cfi_encoding)]
#![feature(char_max_len)]
#![feature(concat_idents)]
#![feature(const_mut_refs)]
#![feature(const_trait_impl)]
Expand Down
3 changes: 2 additions & 1 deletion library/std/src/sys/pal/windows/stdio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::str;
use crate::sys::c;
use crate::sys::cvt;
use crate::sys::handle::Handle;
use core::char::MAX_LEN_UTF8;
use core::str::utf8_char_width;

#[cfg(test)]
Expand Down Expand Up @@ -417,7 +418,7 @@ fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {

impl IncompleteUtf8 {
pub const fn new() -> IncompleteUtf8 {
IncompleteUtf8 { bytes: [0; 4], len: 0 }
IncompleteUtf8 { bytes: [0; MAX_LEN_UTF8], len: 0 }
}
}

Expand Down
6 changes: 3 additions & 3 deletions library/std/src/sys_common/wtf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#[cfg(test)]
mod tests;

use core::char::{encode_utf16_raw, encode_utf8_raw};
use core::char::{encode_utf16_raw, encode_utf8_raw, MAX_LEN_UTF16, MAX_LEN_UTF8};
use core::str::next_code_point;

use crate::borrow::Cow;
Expand Down Expand Up @@ -243,7 +243,7 @@ impl Wtf8Buf {
/// Copied from String::push
/// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
let mut bytes = [0; 4];
let mut bytes = [0; MAX_LEN_UTF8];
let bytes = encode_utf8_raw(code_point.value, &mut bytes);
self.bytes.extend_from_slice(bytes)
}
Expand Down Expand Up @@ -984,7 +984,7 @@ impl<'a> Iterator for EncodeWide<'a> {
return Some(tmp);
}

let mut buf = [0; 2];
let mut buf = [0; MAX_LEN_UTF16];
self.code_points.next().map(|code_point| {
let n = encode_utf16_raw(code_point.value, &mut buf).len();
if n == 2 {
Expand Down

0 comments on commit ae30dc9

Please sign in to comment.