Skip to content

Commit

Permalink
deprecate Unicode functions that will be moved to crates.io
Browse files Browse the repository at this point in the history
This patch
1. renames libunicode to librustc_unicode,
2. deprecates several pieces of libunicode (see below), and
3. removes references to deprecated functions from
   librustc_driver and libsyntax. This may change pretty-printed
   output from these modules in cases involving wide or combining
   characters used in filenames, identifiers, etc.

The following functions are marked deprecated:

1. char.width() and str.width():
   --> use unicode-width crate

2. str.graphemes() and str.grapheme_indices():
   --> use unicode-segmentation crate

3. str.nfd_chars(), str.nfkd_chars(), str.nfc_chars(), str.nfkc_chars(),
   char.compose(), char.decompose_canonical(), char.decompose_compatible(),
   char.canonical_combining_class():
   --> use unicode-normalization crate
  • Loading branch information
kwantam committed Apr 15, 2015
1 parent a691f1e commit 503533c
Show file tree
Hide file tree
Showing 21 changed files with 101 additions and 47 deletions.
12 changes: 6 additions & 6 deletions mk/crates.mk
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
TARGET_CRATES := libc std flate arena term \
serialize getopts collections test rand \
log graphviz core rbml alloc \
unicode rustc_bitflags
rustc_unicode rustc_bitflags
RUSTC_CRATES := rustc rustc_typeck rustc_borrowck rustc_resolve rustc_driver \
rustc_trans rustc_back rustc_llvm rustc_privacy rustc_lint
HOST_CRATES := syntax $(RUSTC_CRATES) rustdoc fmt_macros
Expand All @@ -61,9 +61,9 @@ TOOLS := compiletest rustdoc rustc rustbook

DEPS_core :=
DEPS_libc := core
DEPS_unicode := core
DEPS_rustc_unicode := core
DEPS_alloc := core libc native:jemalloc
DEPS_std := core libc rand alloc collections unicode \
DEPS_std := core libc rand alloc collections rustc_unicode \
native:rust_builtin native:backtrace native:rustrt_native \
rustc_bitflags
DEPS_graphviz := std
Expand Down Expand Up @@ -94,7 +94,7 @@ DEPS_serialize := std log
DEPS_rbml := std log serialize
DEPS_term := std log
DEPS_getopts := std
DEPS_collections := core alloc unicode
DEPS_collections := core alloc rustc_unicode
DEPS_num := std
DEPS_test := std getopts serialize rbml term native:rust_test_helpers
DEPS_rand := core
Expand All @@ -115,11 +115,11 @@ ONLY_RLIB_libc := 1
ONLY_RLIB_alloc := 1
ONLY_RLIB_rand := 1
ONLY_RLIB_collections := 1
ONLY_RLIB_unicode := 1
ONLY_RLIB_rustc_unicode := 1
ONLY_RLIB_rustc_bitflags := 1

# Documented-by-default crates
DOC_CRATES := std alloc collections core libc unicode
DOC_CRATES := std alloc collections core libc rustc_unicode

################################################################################
# You should not need to edit below this line
Expand Down
11 changes: 7 additions & 4 deletions src/etc/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,14 @@ def comp_pfun(char):
emit_table(f, "combining_class_table", combine, "&'static [(char, char, u8)]", is_pub=False,
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))

f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
+ " bsearch_range_value_table(c, combining_class_table)\n"
+ " }\n")
f.write(""" #[deprecated(reason = "use the crates.io `unicode-normalization` lib instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality will be moved to crates.io")]
pub fn canonical_combining_class(c: char) -> u8 {
bsearch_range_value_table(c, combining_class_table)
}
f.write("""
}
""")
Expand Down
2 changes: 1 addition & 1 deletion src/libcollections/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
#[macro_use]
extern crate core;

extern crate unicode;
extern crate rustc_unicode;
extern crate alloc;

#[cfg(test)] #[macro_use] extern crate std;
Expand Down
44 changes: 35 additions & 9 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ use core::result::Result;
use core::str as core_str;
use core::str::pattern::Pattern;
use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use unicode::str::{UnicodeStr, Utf16Encoder};
use rustc_unicode::str::{UnicodeStr, Utf16Encoder};

use core::convert::AsRef;
use vec_deque::VecDeque;
use borrow::{Borrow, ToOwned};
use string::String;
use unicode;
use rustc_unicode;
use vec::Vec;
use slice::SliceConcatExt;

Expand All @@ -78,7 +78,7 @@ pub use core::str::{Matches, RMatches};
pub use core::str::{MatchIndices, RMatchIndices};
pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
pub use core::str::{from_utf8_unchecked, ParseBoolError};
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
pub use rustc_unicode::str::{Words, Graphemes, GraphemeIndices};
pub use core::str::pattern;

/*
Expand Down Expand Up @@ -161,6 +161,9 @@ enum DecompositionType {
/// External iterator for a string decomposition's characters.
///
/// For use with the `std::iter` module.
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[derive(Clone)]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -172,6 +175,7 @@ pub struct Decompositions<'a> {
sorted: bool
}

#[allow(deprecated)]
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Decompositions<'a> {
type Item = char;
Expand All @@ -198,7 +202,7 @@ impl<'a> Iterator for Decompositions<'a> {
{
let callback = |d| {
let class =
unicode::char::canonical_combining_class(d);
rustc_unicode::char::canonical_combining_class(d);
if class == 0 && !*sorted {
canonical_sort(buffer);
*sorted = true;
Expand All @@ -207,10 +211,10 @@ impl<'a> Iterator for Decompositions<'a> {
};
match self.kind {
Canonical => {
unicode::char::decompose_canonical(ch, callback)
rustc_unicode::char::decompose_canonical(ch, callback)
}
Compatible => {
unicode::char::decompose_compatible(ch, callback)
rustc_unicode::char::decompose_compatible(ch, callback)
}
}
}
Expand Down Expand Up @@ -254,6 +258,9 @@ enum RecompositionState {
/// External iterator for a string recomposition's characters.
///
/// For use with the `std::iter` module.
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[derive(Clone)]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -266,6 +273,7 @@ pub struct Recompositions<'a> {
last_ccc: Option<u8>
}

#[allow(deprecated)]
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Recompositions<'a> {
type Item = char;
Expand All @@ -276,7 +284,7 @@ impl<'a> Iterator for Recompositions<'a> {
match self.state {
Composing => {
for ch in self.iter.by_ref() {
let ch_class = unicode::char::canonical_combining_class(ch);
let ch_class = rustc_unicode::char::canonical_combining_class(ch);
if self.composee.is_none() {
if ch_class != 0 {
return Some(ch);
Expand All @@ -288,7 +296,7 @@ impl<'a> Iterator for Recompositions<'a> {

match self.last_ccc {
None => {
match unicode::char::compose(k, ch) {
match rustc_unicode::char::compose(k, ch) {
Some(r) => {
self.composee = Some(r);
continue;
Expand Down Expand Up @@ -316,7 +324,7 @@ impl<'a> Iterator for Recompositions<'a> {
self.last_ccc = Some(ch_class);
continue;
}
match unicode::char::compose(k, ch) {
match rustc_unicode::char::compose(k, ch) {
Some(r) => {
self.composee = Some(r);
continue;
Expand Down Expand Up @@ -465,6 +473,9 @@ impl str {

/// Returns an iterator over the string in Unicode Normalization Form D
/// (canonical decomposition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -480,6 +491,9 @@ impl str {

/// Returns an iterator over the string in Unicode Normalization Form KD
/// (compatibility decomposition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -495,6 +509,9 @@ impl str {

/// An Iterator over the string in Unicode Normalization Form C
/// (canonical decomposition followed by canonical composition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand All @@ -511,6 +528,9 @@ impl str {

/// An Iterator over the string in Unicode Normalization Form KC
/// (compatibility decomposition followed by canonical composition).
#[allow(deprecated)]
#[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
since = "1.0.0")]
#[inline]
#[unstable(feature = "unicode",
reason = "this functionality may be replaced with a more generic \
Expand Down Expand Up @@ -1690,6 +1710,8 @@ impl str {
///
/// assert_eq!(&gr2[..], b);
/// ```
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn graphemes(&self, is_extended: bool) -> Graphemes {
Expand All @@ -1709,6 +1731,8 @@ impl str {
///
/// assert_eq!(&gr_inds[..], b);
/// ```
#[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
Expand Down Expand Up @@ -1749,6 +1773,8 @@ impl str {
/// recommends that these
/// characters be treated as 1 column (i.e., `is_cjk = false`) if the
/// locale is unknown.
#[deprecated(reason = "use the crates.io `unicode-width` library instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "this functionality may only be provided by libunicode")]
pub fn width(&self, is_cjk: bool) -> usize {
Expand Down
4 changes: 2 additions & 2 deletions src/libcollections/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ use core::ops::{self, Deref, Add, Index};
use core::ptr;
use core::slice;
use core::str::pattern::Pattern;
use unicode::str as unicode_str;
use unicode::str::Utf16Item;
use rustc_unicode::str as unicode_str;
use rustc_unicode::str::Utf16Item;

use borrow::{Cow, IntoCow};
use str::{self, FromStr, Utf8Error};
Expand Down
6 changes: 6 additions & 0 deletions src/libcollectionstest/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ fn test_le() {
assert!("foo" != "bar");
}

#[allow(deprecated)]
#[test]
fn test_len() {
assert_eq!("".len(), 0);
Expand Down Expand Up @@ -944,6 +945,7 @@ fn test_words() {
assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}

#[allow(deprecated)]
#[test]
fn test_nfd_chars() {
macro_rules! t {
Expand All @@ -963,6 +965,7 @@ fn test_nfd_chars() {
t!("\u{ac1c}", "\u{1100}\u{1162}");
}

#[allow(deprecated)]
#[test]
fn test_nfkd_chars() {
macro_rules! t {
Expand All @@ -982,6 +985,7 @@ fn test_nfkd_chars() {
t!("\u{ac1c}", "\u{1100}\u{1162}");
}

#[allow(deprecated)]
#[test]
fn test_nfc_chars() {
macro_rules! t {
Expand All @@ -1002,6 +1006,7 @@ fn test_nfc_chars() {
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
}

#[allow(deprecated)]
#[test]
fn test_nfkc_chars() {
macro_rules! t {
Expand Down Expand Up @@ -1033,6 +1038,7 @@ fn test_lines() {
assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]);
}

#[allow(deprecated)]
#[test]
fn test_graphemes() {
use std::iter::order;
Expand Down
1 change: 1 addition & 0 deletions src/libcoretest/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ fn test_len_utf16() {
assert!('\u{1f4a9}'.len_utf16() == 2);
}

#[allow(deprecated)]
#[test]
fn test_width() {
assert_eq!('\x00'.width(false),Some(0));
Expand Down
6 changes: 2 additions & 4 deletions src/librustc_driver/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
#![feature(staged_api)]
#![feature(exit_status)]
#![feature(set_stdio)]
#![feature(unicode)]

extern crate arena;
extern crate flate;
Expand Down Expand Up @@ -574,7 +573,7 @@ Available lint options:
let builtin_groups = sort_lint_groups(builtin_groups);

let max_name_len = plugin.iter().chain(builtin.iter())
.map(|&s| s.name.width(true))
.map(|&s| s.name.chars().count())
.max().unwrap_or(0);
let padded = |x: &str| {
let mut s = repeat(" ").take(max_name_len - x.chars().count())
Expand All @@ -601,7 +600,7 @@ Available lint options:


let max_name_len = plugin_groups.iter().chain(builtin_groups.iter())
.map(|&(s, _)| s.width(true))
.map(|&(s, _)| s.chars().count())
.max().unwrap_or(0);
let padded = |x: &str| {
let mut s = repeat(" ").take(max_name_len - x.chars().count())
Expand Down Expand Up @@ -790,7 +789,6 @@ fn parse_crate_attrs(sess: &Session, input: &Input) ->
///
/// The diagnostic emitter yielded to the procedure should be used for reporting
/// errors of the compiler.
#[allow(deprecated)]
pub fn monitor<F:FnOnce()+Send+'static>(f: F) {
const STACK_SIZE: usize = 8 * 1024 * 1024; // 8MB

Expand Down
4 changes: 4 additions & 0 deletions src/libunicode/char.rs → src/librustc_unicode/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ use tables::{derived_property, property, general_category, conversions, charwidt
pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault};

// unstable reexports
#[allow(deprecated)]
pub use normalize::{decompose_canonical, decompose_compatible, compose};
#[allow(deprecated)]
pub use tables::normalization::canonical_combining_class;
pub use tables::UNICODE_VERSION;

Expand Down Expand Up @@ -445,6 +447,8 @@ impl char {
/// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
/// recommends that these characters be treated as 1 column (i.e.,
/// `is_cjk` = `false`) if the context cannot be reliably determined.
#[deprecated(reason = "use the crates.io `unicode-width` library instead",
since = "1.0.0")]
#[unstable(feature = "unicode",
reason = "needs expert opinion. is_cjk flag stands out as ugly")]
pub fn width(self, is_cjk: bool) -> Option<usize> { charwidth::width(self, is_cjk) }
Expand Down
2 changes: 1 addition & 1 deletion src/libunicode/lib.rs → src/librustc_unicode/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
// Do not remove on snapshot creation. Needed for bootstrap. (Issue #22364)
#![cfg_attr(stage0, feature(custom_attribute))]
#![crate_name = "unicode"]
#![crate_name = "rustc_unicode"]
#![unstable(feature = "unicode")]
#![feature(lang_items)]
#![feature(staged_api)]
Expand Down
Loading

0 comments on commit 503533c

Please sign in to comment.