From 55e52f3d8fb16f27b67e364f3268efdd90cf5d4f Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Mon, 17 Apr 2017 08:20:49 +0200 Subject: [PATCH 1/2] Implement `Vec::from_elem` specialization for all `Copy` types If the input element is zero, `Vec::from_elem` can just invoke `calloc` for any `Copy` type. If the input is non-zero, but its size is 1, it can allocate and then `memset` the buffer. --- src/libcollections/lib.rs | 1 - src/libcollections/vec.rs | 77 ++++++++++++++++----------------------- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index 3bea61f6220b6..613de2bb21c3c 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -35,7 +35,6 @@ #![feature(box_patterns)] #![feature(box_syntax)] #![cfg_attr(not(test), feature(char_escape_debug))] -#![cfg_attr(not(test), feature(core_float))] #![feature(core_intrinsics)] #![feature(dropck_eyepatch)] #![feature(exact_size_is_empty)] diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index a3c529f358598..a7105269f16ac 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -77,8 +77,6 @@ use core::hash::{self, Hash}; use core::intrinsics::{arith_offset, assume}; use core::iter::{FromIterator, FusedIterator, TrustedLen}; use core::mem; -#[cfg(not(test))] -use core::num::Float; use core::ops::{InPlace, Index, IndexMut, Place, Placer}; use core::ops; use core::ptr; @@ -1388,59 +1386,48 @@ impl SpecFromElem for T { } } -impl SpecFromElem for u8 { - #[inline] - fn from_elem(elem: u8, n: usize) -> Vec { - if elem == 0 { +unsafe fn chunked_or + Copy>(x: T) -> U { + let p = &x as *const T as *const U; + let len = mem::size_of::() / mem::size_of::(); + slice::from_raw_parts(p, len).iter().fold(mem::zeroed(), |state, &x| state | x) +} + +fn is_zero(x: T) -> bool { + unsafe { + match mem::align_of::() { + n if n % 16 == 0 => 0u128 == chunked_or(x), + n if n % 8 == 0 => 0u64 == chunked_or(x), + n if n % 4 == 0 => 0u32 == chunked_or(x), + n if n % 2 == 0 => 0u16 == chunked_or(x), + _ => 0u8 == chunked_or(x), + } + } +} + +impl SpecFromElem for T { + default fn from_elem(elem: Self, n: usize) -> Vec { + if is_zero(elem) { return Vec { buf: RawVec::with_capacity_zeroed(n), len: n, } } - unsafe { - let mut v = Vec::with_capacity(n); - ptr::write_bytes(v.as_mut_ptr(), elem, n); - v.set_len(n); - v - } - } -} -macro_rules! impl_spec_from_elem { - ($t: ty, $is_zero: expr) => { - impl SpecFromElem for $t { - #[inline] - fn from_elem(elem: $t, n: usize) -> Vec<$t> { - if $is_zero(elem) { - return Vec { - buf: RawVec::with_capacity_zeroed(n), - len: n, - } - } - let mut v = Vec::with_capacity(n); - v.extend_with_element(n, elem); - v + let mut v = Vec::with_capacity(n); + if mem::size_of::() == 1 { + unsafe { + // let elem: u8 = mem::transmute(elem); + let elem: u8 = *(&elem as *const T as *const u8); + ptr::write_bytes(v.as_mut_ptr(), elem, n); + v.set_len(n); } + } else { + v.extend_with_element(n, elem); } - }; + v + } } -impl_spec_from_elem!(i8, |x| x == 0); -impl_spec_from_elem!(i16, |x| x == 0); -impl_spec_from_elem!(i32, |x| x == 0); -impl_spec_from_elem!(i64, |x| x == 0); -impl_spec_from_elem!(i128, |x| x == 0); -impl_spec_from_elem!(isize, |x| x == 0); - -impl_spec_from_elem!(u16, |x| x == 0); -impl_spec_from_elem!(u32, |x| x == 0); -impl_spec_from_elem!(u64, |x| x == 0); -impl_spec_from_elem!(u128, |x| x == 0); -impl_spec_from_elem!(usize, |x| x == 0); - -impl_spec_from_elem!(f32, |x: f32| x == 0. && x.is_sign_positive()); -impl_spec_from_elem!(f64, |x: f64| x == 0. && x.is_sign_positive()); - //////////////////////////////////////////////////////////////////////////////// // Common trait implementations for Vec //////////////////////////////////////////////////////////////////////////////// From a53dfbedb4abb3c56b243b248a93bfa8dcc2694f Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 19 Apr 2017 13:04:41 +0200 Subject: [PATCH 2/2] Add explanation for `is_zero` and `chunked_or` Add an explanation of the safety requirements for `chunked_or` and how `is_zero` wraps it exposing a safe interface. --- src/libcollections/vec.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs index a7105269f16ac..3fcb23c5dd58b 100644 --- a/src/libcollections/vec.rs +++ b/src/libcollections/vec.rs @@ -1386,13 +1386,26 @@ impl SpecFromElem for T { } } +// Computes the bitwise OR of the input, reinterpreted as [U]. +// Assumes that U is a primitive integer type and that `T` can be +// represented exactly as a slice of elements of type `U`, i.e. +// `mem::size_of::() % mem::size_of::() == 0` unsafe fn chunked_or + Copy>(x: T) -> U { let p = &x as *const T as *const U; let len = mem::size_of::() / mem::size_of::(); slice::from_raw_parts(p, len).iter().fold(mem::zeroed(), |state, &x| state | x) } +// Checks if the raw representation of the input is only binary zeroes. +// Instead of comparing each byte with 0, the whole memory region is +// OR-ed together and the result is compared to 0. fn is_zero(x: T) -> bool { + // Find the greatest alignment that can be used to scan x, as that + // leads to less code and better performance. + // If the alignment is greater than 16, compute the OR using u128, + // as no bigger native integers are available. + // The calls to chunked_or() are safe because mem::size_of::() + // is guaranteed to be a multiple of mem::align_of::(). unsafe { match mem::align_of::() { n if n % 16 == 0 => 0u128 == chunked_or(x),