Skip to content

Commit

Permalink
refactor!: Make arange an alias for int_range
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Aug 15, 2023
1 parent c40976b commit b112e67
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 134 deletions.
3 changes: 0 additions & 3 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -852,9 +852,6 @@ impl From<RangeFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: RangeFunction) -> Self {
use RangeFunction::*;
match func {
ARange { step } => {
map_as_slice!(range::arange, step)
},
IntRange { step } => {
map_as_slice!(range::int_range, step)
},
Expand Down
20 changes: 0 additions & 20 deletions crates/polars-plan/src/dsl/function_expr/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use super::*;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, Copy, PartialEq, Debug, Eq, Hash)]
pub enum RangeFunction {
ARange { step: i64 },
IntRange { step: i64 },
IntRanges { step: i64 },
}
Expand All @@ -12,7 +11,6 @@ impl Display for RangeFunction {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use RangeFunction::*;
match self {
ARange { .. } => write!(f, "arange"),
IntRange { .. } => write!(f, "int_range"),
IntRanges { .. } => write!(f, "int_ranges"),
}
Expand Down Expand Up @@ -51,24 +49,6 @@ where
Ok(ca.into_series())
}

/// Create list entries that are range arrays
/// - if `start` and `end` are a column, every element will expand into an array in a list column.
/// - if `start` and `end` are literals the output will be of `Int64`.
pub(super) fn arange(s: &[Series], step: i64) -> PolarsResult<Series> {
let start = &s[0];
let end = &s[1];

let mut result = if start.len() == 1 && end.len() == 1 {
int_range(s, step)
} else {
int_ranges(s, step)
}?;

result.rename("arange");

Ok(result)
}

pub(super) fn int_range(s: &[Series], step: i64) -> PolarsResult<Series> {
let start = &s[0];
let end = &s[1];
Expand Down
1 change: 0 additions & 1 deletion crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ impl FunctionExpr {
Range(fun) => {
use RangeFunction::*;
let field = match fun {
ARange { .. } => Field::new("arange", DataType::Int64), // This is not always correct
IntRange { .. } => Field::new("int", DataType::Int64),
IntRanges { .. } => {
Field::new("int_range", DataType::List(Box::new(DataType::Int64)))
Expand Down
17 changes: 4 additions & 13 deletions crates/polars-plan/src/dsl/functions/range.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,11 @@
use super::*;

/// Create list entries that are range arrays
/// - if `start` and `end` are a column, every element will expand into an array in a list column.
/// - if `start` and `end` are literals the output will be of `Int64`.
/// Generate a range of integers.
///
/// Alias for `int_range`.
#[cfg(feature = "range")]
pub fn arange(start: Expr, end: Expr, step: i64) -> Expr {
let input = vec![start, end];

Expr::Function {
input,
function: FunctionExpr::Range(RangeFunction::ARange { step }),
options: FunctionOptions {
allow_rename: true,
..Default::default()
},
}
int_range(start, end, step)
}

#[cfg(feature = "range")]
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 16 additions & 35 deletions py-polars/polars/functions/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from polars.type_aliases import (
ClosedInterval,
IntoExpr,
PolarsDataType,
PolarsIntegerType,
TimeUnit,
)
Expand All @@ -39,7 +38,7 @@ def arange(
end: int | Expr | Series,
step: int = ...,
*,
dtype: PolarsDataType | None = ...,
dtype: PolarsIntegerType = ...,
eager: Literal[False] = ...,
) -> Expr:
...
Expand All @@ -51,7 +50,7 @@ def arange(
end: int | IntoExpr,
step: int = ...,
*,
dtype: PolarsDataType | None = ...,
dtype: PolarsIntegerType = ...,
eager: Literal[True],
) -> Series:
...
Expand All @@ -63,30 +62,24 @@ def arange(
end: int | IntoExpr,
step: int = ...,
*,
dtype: PolarsDataType | None = ...,
dtype: PolarsIntegerType = ...,
eager: bool,
) -> Expr | Series:
...


@deprecate_renamed_parameter("low", "start", version="0.18.0")
@deprecate_renamed_parameter("high", "end", version="0.18.0")
def arange(
start: int | IntoExpr,
end: int | IntoExpr,
step: int = 1,
*,
dtype: PolarsDataType | None = None,
dtype: PolarsIntegerType = Int64,
eager: bool = False,
) -> Expr | Series:
"""
Generate a range of integers.
.. deprecated:: 0.18.5
``arange`` has been replaced by two new functions: ``int_range`` for generating
a single range, and ``int_ranges`` for generating a list column with multiple
ranges. ``arange`` will remain available as an alias for `int_range`, which
means it will lose the functionality to generate multiple ranges.
Alias for :func:`int_range`.
Parameters
----------
Expand All @@ -97,11 +90,15 @@ def arange(
step
Step size of the range.
dtype
Data type of the resulting column. Defaults to ``Int64``.
Data type of the range. Defaults to ``Int64``.
eager
Evaluate immediately and return a ``Series``.
If set to ``False`` (default), return an expression instead.
Returns
-------
Column of data type ``dtype``.
See Also
--------
int_range : Generate a range of integers.
Expand All @@ -111,35 +108,15 @@ def arange(
--------
>>> pl.arange(0, 3, eager=True)
shape: (3,)
Series: 'arange' [i64]
Series: 'int' [i64]
[
0
1
2
]
"""
# This check is not water-proof, but we cannot check for literal expressions here
if not (isinstance(start, int) and isinstance(end, int)):
issue_deprecation_warning(
" `arange` has been replaced by two new functions:"
" `int_range` for generating a single range,"
" and `int_ranges` for generating a list column with multiple ranges."
" `arange` will remain available as an alias for `int_range`, which means its behaviour will change."
" To silence this warning, use either of the new functions.",
version="0.18.5",
)

start = parse_as_expression(start)
end = parse_as_expression(end)
result = wrap_expr(plr.arange(start, end, step))

if dtype is not None and dtype != Int64:
result = result.cast(dtype)
if eager:
return F.select(result).to_series()

return result
return int_range(start, end, step, dtype=dtype, eager=eager)


@overload
Expand Down Expand Up @@ -205,8 +182,12 @@ def int_range(
Returns
-------
<<<<<<< HEAD
Expr or Series
Column of data type :class:`Int64`.
=======
Column of data type ``dtype``.
>>>>>>> b77bbfca0d (refactor!: Make `arange` an alias for `int_range`)
See Also
--------
Expand Down
5 changes: 0 additions & 5 deletions py-polars/src/functions/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@ use pyo3::prelude::*;
use crate::prelude::*;
use crate::PyExpr;

#[pyfunction]
pub fn arange(start: PyExpr, end: PyExpr, step: i64) -> PyExpr {
dsl::arange(start.inner, end.inner, step).into()
}

#[pyfunction]
pub fn int_range(start: PyExpr, end: PyExpr, step: i64, dtype: Wrap<DataType>) -> PyExpr {
let dtype = dtype.0;
Expand Down
2 changes: 0 additions & 2 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> {
.unwrap();

// Functions - range
m.add_wrapped(wrap_pyfunction!(functions::range::arange))
.unwrap();
m.add_wrapped(wrap_pyfunction!(functions::range::int_range))
.unwrap();
m.add_wrapped(wrap_pyfunction!(functions::range::int_ranges))
Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/benchmark/test_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,11 @@ def test_max_statistic_parquet_writer() -> None:
n = 150_000

# int64 is important to hit the page size
df = pl.arange(0, n, eager=True, dtype=pl.Int64).to_frame()
df = pl.int_range(0, n, eager=True, dtype=pl.Int64).to_frame()
f = "/tmp/tmp.parquet"
df.write_parquet(f, statistics=True, use_pyarrow=False, row_group_size=n)
result = pl.scan_parquet(f).filter(pl.col("arange") > n - 3).collect()
expected = pl.DataFrame({"arange": [149998, 149999]})
result = pl.scan_parquet(f).filter(pl.col("int") > n - 3).collect()
expected = pl.DataFrame({"int": [149998, 149999]})
assert_frame_equal(result, expected)


Expand Down
54 changes: 3 additions & 51 deletions py-polars/tests/unit/functions/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ def test_arange() -> None:
assert_frame_equal(result, expected)


def test_arange_decreasing() -> None:
assert pl.arange(10, 1, -2, eager=True).to_list() == list(range(10, 1, -2))
def test_int_range_decreasing() -> None:
assert pl.int_range(10, 1, -2, eager=True).to_list() == list(range(10, 1, -2))


def test_arange_expr() -> None:
def test_int_range_expr() -> None:
df = pl.DataFrame({"a": ["foobar", "barfoo"]})
out = df.select(pl.int_range(0, pl.col("a").count() * 10))
assert out.shape == (20, 1)
Expand All @@ -42,54 +42,6 @@ def test_arange_expr() -> None:
assert out2.to_list() == [0, 2, 4, 6, 8]


def test_arange_deprecated() -> None:
df = pl.DataFrame(
{
"start": [1, 2, 3, 5, 5, 5],
"stop": [8, 3, 12, 8, 8, 8],
}
)

with pytest.deprecated_call():
result = df.select(pl.arange(pl.lit(1), pl.col("stop") + 1).alias("test"))

expected = pl.DataFrame(
{
"test": [
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3],
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3, 4, 5, 6, 7, 8],
[1, 2, 3, 4, 5, 6, 7, 8],
]
}
)
assert_frame_equal(result, expected)

with pytest.deprecated_call():
result_s = pl.arange(pl.Series([0, 19]), pl.Series([3, 39]), step=2, eager=True)
assert result_s.dtype == pl.List
assert result_s[0].to_list() == [0, 2]


def test_arange_name() -> None:
expected_name = "arange"
result_eager = pl.arange(0, 5, eager=True)
assert result_eager.name == expected_name

result_lazy = pl.select(pl.arange(0, 5)).to_series()
assert result_lazy.name == expected_name


def test_arange_schema() -> None:
result = pl.LazyFrame().select(pl.arange(-3, 3))

expected_schema = {"arange": pl.Int64}
assert result.schema == expected_schema
assert result.collect().schema == expected_schema


def test_int_range() -> None:
result = pl.int_range(0, 3)
expected = pl.Series("int", [0, 1, 2])
Expand Down

0 comments on commit b112e67

Please sign in to comment.