Skip to content

Commit

Permalink
std: Tweak some unstable features of str
Browse files Browse the repository at this point in the history
This commit clarifies some of the unstable features in the `str` module by
moving them out of the blanket `core` and `collections` features.

The following methods were moved to the `str_char` feature which generally
encompasses decoding specific characters from a `str` and dealing with the
result. It is unclear if any of these methods need to be stabilized for 1.0 and
the most conservative route for now is to continue providing them but to leave
them as unstable under a more specific name.

* `is_char_boundary`
* `char_at`
* `char_range_at`
* `char_at_reverse`
* `char_range_at_reverse`
* `slice_shift_char`

The following methods were moved into the generic `unicode` feature as they are
specifically enabled by the `unicode` crate itself.

* `nfd_chars`
* `nfkd_chars`
* `nfc_chars`
* `graphemes`
* `grapheme_indices`
* `width`
  • Loading branch information
alexcrichton committed Mar 18, 2015
1 parent c64d671 commit aa88da6
Show file tree
Hide file tree
Showing 16 changed files with 156 additions and 92 deletions.
18 changes: 9 additions & 9 deletions src/compiletest/runtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1052,22 +1052,22 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool {
if *idx >= haystack.len() {
return false;
}
let range = haystack.char_range_at(*idx);
if range.ch != needle {
let ch = haystack.char_at(*idx);
if ch != needle {
return false;
}
*idx = range.next;
*idx += ch.len_utf8();
return true;
}

fn scan_integer(haystack: &str, idx: &mut uint) -> bool {
let mut i = *idx;
while i < haystack.len() {
let range = haystack.char_range_at(i);
if range.ch < '0' || '9' < range.ch {
let ch = haystack.char_at(i);
if ch < '0' || '9' < ch {
break;
}
i = range.next;
i += ch.len_utf8();
}
if i == *idx {
return false;
Expand All @@ -1083,9 +1083,9 @@ fn scan_string(haystack: &str, needle: &str, idx: &mut uint) -> bool {
if haystack_i >= haystack.len() {
return false;
}
let range = haystack.char_range_at(haystack_i);
haystack_i = range.next;
if !scan_char(needle, range.ch, &mut needle_i) {
let ch = haystack.char_at(haystack_i);
haystack_i += ch.len_utf8();
if !scan_char(needle, ch, &mut needle_i) {
return false;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/libcollections/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#![feature(unique)]
#![feature(unsafe_no_drop_flag)]
#![feature(step_by)]
#![feature(str_char)]
#![cfg_attr(test, feature(rand, rustc_private, test))]
#![cfg_attr(test, allow(deprecated))] // rand

Expand Down
158 changes: 102 additions & 56 deletions src/libcollections/str.rs

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions src/libcollections/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use unicode::str as unicode_str;
use unicode::str::Utf16Item;

use borrow::{Cow, IntoCow};
use str::{self, CharRange, FromStr, Utf8Error};
use str::{self, FromStr, Utf8Error};
use vec::{DerefVec, Vec, as_vec};

/// A growable string stored as a UTF-8 encoded buffer.
Expand Down Expand Up @@ -561,9 +561,9 @@ impl String {
return None
}

let CharRange {ch, next} = self.char_range_at_reverse(len);
let ch = self.char_at_reverse(len);
unsafe {
self.vec.set_len(next);
self.vec.set_len(len - ch.len_utf8());
}
Some(ch)
}
Expand Down Expand Up @@ -595,7 +595,8 @@ impl String {
let len = self.len();
assert!(idx <= len);

let CharRange { ch, next } = self.char_range_at(idx);
let ch = self.char_at(idx);
let next = idx + ch.len_utf8();
unsafe {
ptr::copy(self.vec.as_mut_ptr().offset(idx as isize),
self.vec.as_ptr().offset(next as isize),
Expand Down
11 changes: 7 additions & 4 deletions src/libcore/str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

use self::OldSearcher::{TwoWay, TwoWayLong};

use char::CharExt;
use clone::Clone;
use cmp::{self, Eq};
use default::Default;
Expand Down Expand Up @@ -1112,8 +1113,10 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
/// the next `char` in a string. This can be used as a data structure
/// for iterating over the UTF-8 bytes of a string.
#[derive(Copy)]
#[unstable(feature = "core",
reason = "naming is uncertain with container conventions")]
#[unstable(feature = "str_char",
reason = "existence of this struct is uncertain as it is frequently \
able to be replaced with char.len_utf8() and/or \
char/char_indices iterators")]
pub struct CharRange {
/// Current `char`
pub ch: char,
Expand Down Expand Up @@ -1646,8 +1649,8 @@ impl StrExt for str {
if self.is_empty() {
None
} else {
let CharRange {ch, next} = self.char_range_at(0);
let next_s = unsafe { self.slice_unchecked(next, self.len()) };
let ch = self.char_at(0);
let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) };
Some((ch, next_s))
}
}
Expand Down
14 changes: 7 additions & 7 deletions src/libgetopts/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,10 @@
html_playground_url = "http://play.rust-lang.org/")]

#![deny(missing_docs)]
#![feature(collections)]
#![feature(int_uint)]
#![feature(staged_api)]
#![feature(core)]
#![feature(str_words)]
#![feature(str_char)]
#![cfg_attr(test, feature(rustc_private))]

#[cfg(test)] #[macro_use] extern crate log;
Expand Down Expand Up @@ -620,8 +619,8 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
let mut j = 1;
names = Vec::new();
while j < curlen {
let range = cur.char_range_at(j);
let opt = Short(range.ch);
let ch = cur.char_at(j);
let opt = Short(ch);

/* In a series of potential options (eg. -aheJ), if we
see one which takes an argument, we assume all
Expand All @@ -642,12 +641,13 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
No => false
};

if arg_follows && range.next < curlen {
i_arg = Some((&cur[range.next..curlen]).to_string());
let next = j + ch.len_utf8();
if arg_follows && next < curlen {
i_arg = Some((&cur[next..curlen]).to_string());
break;
}

j = range.next;
j = next;
}
}
let mut name_pos = 0;
Expand Down
1 change: 1 addition & 0 deletions src/librustc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#![feature(io)]
#![feature(path_ext)]
#![feature(str_words)]
#![feature(str_char)]
#![cfg_attr(test, feature(test))]

extern crate arena;
Expand Down
1 change: 1 addition & 0 deletions src/librustc_driver/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#![feature(exit_status)]
#![feature(io)]
#![feature(set_stdio)]
#![feature(unicode)]

extern crate arena;
extern crate flate;
Expand Down
1 change: 1 addition & 0 deletions src/librustc_lint/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#![feature(unsafe_destructor)]
#![feature(staged_api)]
#![feature(std_misc)]
#![feature(str_char)]
#![cfg_attr(test, feature(test))]

extern crate syntax;
Expand Down
1 change: 1 addition & 0 deletions src/libserialize/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Core encoding and decoding interfaces.
#![feature(staged_api)]
#![feature(std_misc)]
#![feature(unicode)]
#![feature(str_char)]
#![cfg_attr(test, feature(test))]

// test harness access
Expand Down
1 change: 1 addition & 0 deletions src/libstd/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@
#![feature(int_uint)]
#![feature(unique)]
#![feature(allow_internal_unstable)]
#![feature(str_char)]
#![cfg_attr(test, feature(test, rustc_private))]

// Don't link to std. We are std.
Expand Down
1 change: 1 addition & 0 deletions src/libsyntax/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#![feature(std_misc)]
#![feature(unicode)]
#![feature(path_ext)]
#![feature(str_char)]

extern crate arena;
extern crate fmt_macros;
Expand Down
7 changes: 3 additions & 4 deletions src/libsyntax/parse/lexer/comments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ use parse::lexer;
use print::pprust;

use std::io::Read;
use std::str;
use std::usize;

#[derive(Clone, Copy, PartialEq)]
Expand Down Expand Up @@ -210,11 +209,11 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
let mut col = col.to_usize();
let mut cursor: usize = 0;
while col > 0 && cursor < len {
let r: str::CharRange = s.char_range_at(cursor);
if !r.ch.is_whitespace() {
let ch = s.char_at(cursor);
if !ch.is_whitespace() {
return None;
}
cursor = r.next;
cursor += ch.len_utf8();
col -= 1;
}
return Some(cursor);
Expand Down
16 changes: 9 additions & 7 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use std::fmt;
use std::mem::replace;
use std::num;
use std::rc::Rc;
use std::str;

pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag};

Expand Down Expand Up @@ -291,7 +290,8 @@ impl<'a> StringReader<'a> {
s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
let mut i = 0;
while i < s.len() {
let str::CharRange { ch, next } = s.char_range_at(i);
let ch = s.char_at(i);
let next = i + ch.len_utf8();
if ch == '\r' {
if next < s.len() && s.char_at(next) == '\n' {
return translate_crlf_(self, start, s, errmsg, i).into_cow();
Expand All @@ -309,7 +309,8 @@ impl<'a> StringReader<'a> {
let mut buf = String::with_capacity(s.len());
let mut j = 0;
while i < s.len() {
let str::CharRange { ch, next } = s.char_range_at(i);
let ch = s.char_at(i);
let next = i + ch.len_utf8();
if ch == '\r' {
if j < i { buf.push_str(&s[j..i]); }
j = next;
Expand All @@ -335,10 +336,11 @@ impl<'a> StringReader<'a> {
if current_byte_offset < self.source_text.len() {
assert!(self.curr.is_some());
let last_char = self.curr.unwrap();
let next = self.source_text.char_range_at(current_byte_offset);
let byte_offset_diff = next.next - current_byte_offset;
let ch = self.source_text.char_at(current_byte_offset);
let next = current_byte_offset + ch.len_utf8();
let byte_offset_diff = next - current_byte_offset;
self.pos = self.pos + Pos::from_usize(byte_offset_diff);
self.curr = Some(next.ch);
self.curr = Some(ch);
self.col = self.col + CharPos(1);
if last_char == '\n' {
self.filemap.next_line(self.last_pos);
Expand Down Expand Up @@ -370,7 +372,7 @@ impl<'a> StringReader<'a> {
let offset = self.byte_offset(self.pos).to_usize();
let s = &self.source_text[..];
if offset >= s.len() { return None }
let str::CharRange { next, .. } = s.char_range_at(offset);
let next = offset + s.char_at(offset).len_utf8();
if next < s.len() {
Some(s.char_at(next))
} else {
Expand Down
1 change: 1 addition & 0 deletions src/libterm/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
#![feature(rustc_private)]
#![feature(staged_api)]
#![feature(std_misc)]
#![feature(str_char)]
#![feature(path_ext)]
#![cfg_attr(windows, feature(libc))]

Expand Down
7 changes: 6 additions & 1 deletion src/libunicode/u_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ impl<'a> Iterator for Graphemes<'a> {
}

self.cat = if take_curr {
idx = self.string.char_range_at(idx).next;
idx = idx + len_utf8(self.string.char_at(idx));
None
} else {
Some(cat)
Expand All @@ -256,6 +256,11 @@ impl<'a> Iterator for Graphemes<'a> {
}
}

#[cfg(stage0)]
fn len_utf8(c: char) -> usize { UCharExt::len_utf8(c) }
#[cfg(not(stage0))]
fn len_utf8(c: char) -> usize { c.len_utf8() }

impl<'a> DoubleEndedIterator for Graphemes<'a> {
#[inline]
fn next_back(&mut self) -> Option<&'a str> {
Expand Down

0 comments on commit aa88da6

Please sign in to comment.