diff --git a/Cargo.toml b/Cargo.toml index 433989d..20db4dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,4 +24,7 @@ pollster = "0.3.0" bytemuck = "1.15.0" futures-intrusive = "0.5.0" log = { version = "0.4.21" } -hashbrown = "0.14.3" \ No newline at end of file +hashbrown = "0.14.3" + +[profile.dev] +debug="line-tables-only" \ No newline at end of file diff --git a/crates/array/src/array/boolean_gpu.rs b/crates/array/src/array/boolean_gpu.rs index d4a4934..7bd0195 100644 --- a/crates/array/src/array/boolean_gpu.rs +++ b/crates/array/src/array/boolean_gpu.rs @@ -85,7 +85,6 @@ impl BooleanArrayGPU { pub fn raw_values(&self) -> Option> { let result = self.gpu_device.retrive_data(&self.data); - let result: Vec = bytemuck::cast_slice(&result).to_vec(); let mut bool_result = Vec::::with_capacity(self.len); for i in 0..self.len { bool_result.push(BooleanBufferBuilder::is_set_in_slice(&result, i)) @@ -211,11 +210,18 @@ mod tests { #[test] fn test_boolean_values() { let gpu_device = GpuDevice::new(); - let values = vec![Some(true), Some(true), Some(false), None]; + let mut values = vec![Some(true), Some(true), Some(false), None]; + for i in 0..100 { + values.extend_from_within(0..4); + } let array = BooleanArrayGPU::from_optional_slice(&values, Arc::new(gpu_device)); - let raw_values = array.raw_values().unwrap(); - assert_eq!(raw_values, vec![true, true, false, false]); + let mut raw_value = vec![true, true, false, false]; + for i in 0..100 { + raw_value.extend_from_within(0..4); + } + + assert_eq!(array.raw_values().unwrap(), raw_value); let gpu_values = array.values(); assert_eq!(gpu_values, values); diff --git a/crates/math/compute_shaders/f32/floatbinary.wgsl b/crates/math/compute_shaders/f32/floatbinary.wgsl new file mode 100644 index 0000000..07cb9d1 --- /dev/null +++ b/crates/math/compute_shaders/f32/floatbinary.wgsl @@ -0,0 +1,18 @@ +@group(0) +@binding(0) +var input_values: array; + +@group(0) +@binding(1) +var power_values: array; + +@group(0) +@binding(2) +var output_values: array; + + +@compute +@workgroup_size(256) +fn power_(@builtin(global_invocation_id) global_id: vec3) { + output_values[global_id.x] = pow(input_values[global_id.x], power_values[global_id.x]); +} \ No newline at end of file diff --git a/crates/math/compute_shaders/i32/binary.wgsl b/crates/math/compute_shaders/i32/binary.wgsl new file mode 100644 index 0000000..c2fe206 --- /dev/null +++ b/crates/math/compute_shaders/i32/binary.wgsl @@ -0,0 +1,29 @@ +@group(0) +@binding(0) +var input_values: array; + +@group(0) +@binding(1) +var power_values: array; + +@group(0) +@binding(2) +var output_values: array; + +@compute +@workgroup_size(256) +fn power_(@builtin(global_invocation_id) global_id: vec3) { + var input_value = input_values[global_id.x]; + var result = 1; + if power_values[global_id.x] >= 0 { + for (var i = 0; i < power_values[global_id.x]; i++) { + result *= input_value; + } + } else { + for (var i = 0; i < abs(power_values[global_id.x]); i++) { + result /= input_value; + } + } + + output_values[global_id.x] = result; +} \ No newline at end of file diff --git a/crates/math/src/f32.rs b/crates/math/src/f32.rs index fedbc72..3c8df96 100644 --- a/crates/math/src/f32.rs +++ b/crates/math/src/f32.rs @@ -3,9 +3,11 @@ use arrow_gpu_array::gpu_utils::*; use std::sync::*; use wgpu::Buffer; +use crate::MathBinaryType; use crate::{FloatMathUnaryType, MathUnaryType}; const FLOATUNARY_SHADER: &str = include_str!("../compute_shaders/f32/floatunary.wgsl"); +const FLOATBINARY_SHADER: &str = include_str!("../compute_shaders/f32/floatbinary.wgsl"); impl MathUnaryType for f32 { type OutputType = Float32ArrayGPU; @@ -29,6 +31,28 @@ impl MathUnaryType for f32 { } } +impl MathBinaryType for f32 { + type OutputType = Float32ArrayGPU; + + const SHADER: &'static str = FLOATBINARY_SHADER; + const BUFFER_SIZE_MULTIPLIER: u64 = 1; + + fn create_new( + data: Arc, + device: Arc, + len: usize, + null_buffer: Option, + ) -> Self::OutputType { + Float32ArrayGPU { + data, + gpu_device: device, + phantom: std::marker::PhantomData, + len, + null_buffer, + } + } +} + impl FloatMathUnaryType for f32 { type OutputType = Float32ArrayGPU; const SHADER: &'static str = FLOATUNARY_SHADER; @@ -210,4 +234,67 @@ mod tests { (-3.0f32).log2() ] ); + + test_float_array_op!( + #[cfg_attr( + any(target_os = "macos", target_os = "linux"), + ignore = "-x ** 0.0 is returning different values based on OS" + )] + test_f32_power, + Float32ArrayGPU, + Float32ArrayGPU, + Float32ArrayGPU, + power, + power_dyn, + vec![ + Some(1.0f32), + Some(-1.0f32), + Some(10.0f32), + Some(-10.0f32), + Some(3.0), + Some(-1.0f32), + None, + None, + Some(f32::NAN), + Some(f32::INFINITY), + Some(f32::NEG_INFINITY), + Some(f32::NEG_INFINITY), + Some(f32::INFINITY), + Some(f32::NAN), + ], + vec![ + Some(0.0f32), + Some(0.0), + Some(0.0), + Some(0.0), + Some(2.0), + None, + Some(3.0), + None, + Some(f32::NAN), + Some(f32::INFINITY), + Some(f32::NEG_INFINITY), + Some(f32::INFINITY), + Some(f32::NEG_INFINITY), + Some(3.0), + ], + vec![ + Some(1.0f32.powf(0.0)), + // TODO fixeme gpu -1.0 ** 0.0 gives NAN instead of 1.0 + Some(f32::NAN), + Some(10.0f32.powf(0.0)), + // TODO fixeme gpu -10.0 ** 0.0 gives NAN instead of 1.0 + Some(f32::NAN), + Some(3.0f32.powf(2.0)), + None, + None, + None, + Some(f32::NAN), + Some(f32::INFINITY), + Some(f32::NAN), + Some(f32::NAN), + Some(0.0), + Some(f32::NAN), + ] + ); } diff --git a/crates/math/src/i32.rs b/crates/math/src/i32.rs index 8ec740c..cae32db 100644 --- a/crates/math/src/i32.rs +++ b/crates/math/src/i32.rs @@ -6,9 +6,10 @@ use arrow_gpu_array::{ }; use wgpu::Buffer; -use crate::MathUnaryType; +use crate::{MathBinaryType, MathUnaryType}; const I32UNARY_SHADER: &str = include_str!("../compute_shaders/i32/unary.wgsl"); +const I32BINARY_SHADER: &str = include_str!("../compute_shaders/i32/binary.wgsl"); impl MathUnaryType for i32 { type OutputType = Int32ArrayGPU; @@ -32,11 +33,33 @@ impl MathUnaryType for i32 { } } +impl MathBinaryType for i32 { + type OutputType = Int32ArrayGPU; + + const SHADER: &'static str = I32BINARY_SHADER; + const BUFFER_SIZE_MULTIPLIER: u64 = 1; + + fn create_new( + data: Arc, + device: Arc, + len: usize, + null_buffer: Option, + ) -> Self::OutputType { + Int32ArrayGPU { + data, + gpu_device: device, + phantom: std::marker::PhantomData, + len, + null_buffer, + } + } +} + #[cfg(test)] mod test { use crate::*; - use arrow_gpu_test_macros::test_unary_op; use arrow_gpu_array::array::Int32ArrayGPU; + use arrow_gpu_test_macros::*; test_unary_op!( test_i32_abs, @@ -47,4 +70,46 @@ mod test { abs_dyn, vec![0, 1, 2, 3, 4] ); + + test_array_op!( + test_i32_power, + Int32ArrayGPU, + Int32ArrayGPU, + Int32ArrayGPU, + power, + power_dyn, + vec![ + Some(0i32), + Some(-1), + Some(-2), + Some(-3), + Some(-4), + Some(-2), + None, + Some(1), + None + ], + vec![ + Some(0i32), + Some(-1), + Some(2), + Some(3), + Some(1), + Some(-2), + Some(1), + None, + None + ], + vec![ + Some(1i32), + Some(-1), + Some(4), + Some(-27), + Some(-4), + Some(0), + None, + None, + None + ] + ); } diff --git a/crates/math/src/lib.rs b/crates/math/src/lib.rs index cd1afbf..99efd55 100644 --- a/crates/math/src/lib.rs +++ b/crates/math/src/lib.rs @@ -16,6 +16,7 @@ const EXP_ENTRY_POINT: &str = "exp_"; const EXP2_ENTRY_POINT: &str = "exp2_"; const LOG_ENTRY_POINT: &str = "log_"; const LOG2_ENTRY_POINT: &str = "log2_"; +const POWER_ENTRY_POINT: &str = "power_"; macro_rules! default_impl { ($self: ident, $fn: ident) => { @@ -24,6 +25,12 @@ macro_rules! default_impl { pipeline.finish(); return output; }; + ($self: ident, $other: ident, $fn: ident) => { + let mut pipeline = ArrowComputePipeline::new($self.get_gpu_device(), None); + let output = Self::$fn(&$self, $other, &mut pipeline); + pipeline.finish(); + return output; + }; } pub trait MathUnaryPass: ArrayUtils { @@ -49,6 +56,29 @@ pub trait MathUnaryType { ) -> Self::OutputType; } +pub trait MathBinaryPass: ArrayUtils + Sized { + type OutputType; + + fn power(&self, other: &Self) -> Self::OutputType { + default_impl!(self, other, power_op); + } + + fn power_op(&self, other: &Self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType; +} + +pub trait MathBinaryType { + type OutputType; + const SHADER: &'static str; + const BUFFER_SIZE_MULTIPLIER: u64; + + fn create_new( + data: Arc, + device: Arc, + len: usize, + null_buffer: Option, + ) -> Self::OutputType; +} + pub trait FloatMathUnary: ArrayUtils { type OutputType; fn sqrt(&self) -> Self::OutputType { @@ -91,7 +121,7 @@ pub trait FloatMathUnaryType { ) -> Self::OutputType; } -macro_rules! apply_unary_function_op { +macro_rules! apply_function_op { ($self: ident, $trait_name:ident, $entry_point: ident, $pipeline: ident) => { let dispatch_size = $self .data @@ -111,6 +141,34 @@ macro_rules! apply_unary_function_op { &mut $pipeline.encoder, ); + return ::create_new( + Arc::new(new_buffer), + $self.gpu_device.clone(), + $self.len, + new_null_buffer, + ); + }; + ($self: ident, $other: ident, $trait_name:ident, $entry_point: ident, $pipeline: ident) => { + let dispatch_size = $self + .data + .size() + .div_ceil(::ITEM_SIZE) + .div_ceil(256) as u32; + + let new_buffer = $pipeline.apply_binary_function( + &$self.data, + &$other.data, + $self.data.size() * ::BUFFER_SIZE_MULTIPLIER, + T::SHADER, + $entry_point, + dispatch_size, + ); + let new_null_buffer = NullBitBufferGpu::merge_null_bit_buffer_op( + &$self.null_buffer, + &$other.null_buffer, + $pipeline, + ); + return ::create_new( Arc::new(new_buffer), $self.gpu_device.clone(), @@ -124,7 +182,15 @@ impl MathUnaryPass for PrimitiveArrayGpu< type OutputType = T::OutputType; fn abs_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { - apply_unary_function_op!(self, MathUnaryType, ABS_ENTRY_POINT, pipeline); + apply_function_op!(self, MathUnaryType, ABS_ENTRY_POINT, pipeline); + } +} + +impl MathBinaryPass for PrimitiveArrayGpu { + type OutputType = T::OutputType; + + fn power_op(&self, other: &Self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { + apply_function_op!(self, other, MathBinaryType, POWER_ENTRY_POINT, pipeline); } } @@ -132,31 +198,31 @@ impl FloatMathUnary for PrimitiveArr type OutputType = T::OutputType; fn sqrt_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { - apply_unary_function_op!(self, FloatMathUnaryType, SQRT_ENTRY_POINT, pipeline); + apply_function_op!(self, FloatMathUnaryType, SQRT_ENTRY_POINT, pipeline); } fn cbrt_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { - apply_unary_function_op!(self, FloatMathUnaryType, CBRT_ENTRY_POINT, pipeline); + apply_function_op!(self, FloatMathUnaryType, CBRT_ENTRY_POINT, pipeline); } fn exp_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { - apply_unary_function_op!(self, FloatMathUnaryType, EXP_ENTRY_POINT, pipeline); + apply_function_op!(self, FloatMathUnaryType, EXP_ENTRY_POINT, pipeline); } fn exp2_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { - apply_unary_function_op!(self, FloatMathUnaryType, EXP2_ENTRY_POINT, pipeline); + apply_function_op!(self, FloatMathUnaryType, EXP2_ENTRY_POINT, pipeline); } fn log_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { - apply_unary_function_op!(self, FloatMathUnaryType, LOG_ENTRY_POINT, pipeline); + apply_function_op!(self, FloatMathUnaryType, LOG_ENTRY_POINT, pipeline); } fn log2_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType { - apply_unary_function_op!(self, FloatMathUnaryType, LOG2_ENTRY_POINT, pipeline); + apply_function_op!(self, FloatMathUnaryType, LOG2_ENTRY_POINT, pipeline); } } -macro_rules! dyn_fn { +macro_rules! dyn_unary_fn { ($([$dyn: ident, $dyn_op: ident, $array_op: ident, $($arr:ident),* ]),*) => { $( pub fn $dyn(data: &ArrowArrayGPU) -> ArrowArrayGPU { @@ -176,7 +242,7 @@ macro_rules! dyn_fn { } } -dyn_fn!( +dyn_unary_fn!( [abs_dyn, abs_op_dyn, abs_op, Float32ArrayGPU, Int32ArrayGPU], [sqrt_dyn, sqrt_op_dyn, sqrt_op, Float32ArrayGPU], [cbrt_dyn, cbrt_op_dyn, cbrt_op, Float32ArrayGPU], @@ -185,3 +251,36 @@ dyn_fn!( [log_dyn, log_op_dyn, log_op, Float32ArrayGPU], [log2_dyn, log2_op_dyn, log2_op, Float32ArrayGPU] ); + +macro_rules! dyn_binary_fn { + ($([$dyn: ident, $dyn_op: ident, $array_op: ident, $($arr:ident),* ]),*) => { + $( + pub fn $dyn(input1: &ArrowArrayGPU, input2: &ArrowArrayGPU) -> ArrowArrayGPU { + let mut pipeline = ArrowComputePipeline::new(input1.get_gpu_device(), None); + let result = $dyn_op(input1, input2, &mut pipeline); + pipeline.finish(); + result + } + + pub fn $dyn_op(input1: &ArrowArrayGPU, input2: &ArrowArrayGPU, pipeline: &mut ArrowComputePipeline) -> ArrowArrayGPU { + match (input1, input2) { + $((ArrowArrayGPU::$arr(arr_1), ArrowArrayGPU::$arr(arr_2)) => arr_1.$array_op(arr_2, pipeline).into(),)+ + _ => panic!( + "Operation {} not supported for type {:?} {:?}", + stringify!($function), + input1.get_dtype(), + input2.get_dtype(), + ), + } + } + )+ + } +} + +dyn_binary_fn!([ + power_dyn, + power_op_dyn, + power_op, + Int32ArrayGPU, + Float32ArrayGPU +]); diff --git a/crates/routines/src/bool.rs b/crates/routines/src/bool.rs index a46ae82..44bc74a 100644 --- a/crates/routines/src/bool.rs +++ b/crates/routines/src/bool.rs @@ -23,7 +23,7 @@ pub(crate) fn take_bool( data, &indexes.data, indexes.len as u64, - 32, + (indexes.len.div_ceil(32) as u64) * 4, TAKE_SHADER, "take", pipeline, diff --git a/crates/test_macros/src/lib.rs b/crates/test_macros/src/lib.rs index 49b8b9d..f6876ae 100644 --- a/crates/test_macros/src/lib.rs +++ b/crates/test_macros/src/lib.rs @@ -197,7 +197,8 @@ macro_rules! test_unary_op_float { #[macro_export] macro_rules! test_float_array_op { - ($fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $input_1: expr, $input_2: expr, $output: expr) => { + ($(#[$m:meta])* $fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $input_1: expr, $input_2: expr, $output: expr) => { + $(#[$m])* #[test] fn $fn_name() { use arrow_gpu_array::GPU_DEVICE; @@ -216,7 +217,8 @@ macro_rules! test_float_array_op { } } }; - ($fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $operation_dyn: ident, $input_1: expr, $input_2: expr, $output: expr) => { + ($(#[$m:meta])* $fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $operation_dyn: ident, $input_1: expr, $input_2: expr, $output: expr) => { + $(#[$m])* #[test] fn $fn_name() { use arrow_gpu_array::GPU_DEVICE; diff --git a/docs/src/kernels/math.md b/docs/src/kernels/math.md index 329f077..e92f1df 100644 --- a/docs/src/kernels/math.md +++ b/docs/src/kernels/math.md @@ -1,11 +1,11 @@ # Math related kernels -| | absolute | sqrt | exp | exp2 | log | log2 | cbrt | -|-|-|-|-|-|-|-| -| Int8 | | | -| Int16 | | | -| Int32 | | | -| UInt8 | | | -| UInt16 | | | -| UInt32 | | | -| Float32 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | \ No newline at end of file +| | absolute | sqrt | exp | exp2 | log | log2 | cbrt | power | +|-|-|-|-|-|-|-|-|-| +| Int8 | | | | | | | | | +| Int16 | | | | | | | | | +| Int32 | | | | | | | | ✓ | +| UInt8 | | | | | | | | | +| UInt16 | | | | | | | | | +| UInt32 | | | | | | | | | +| Float32 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | \ No newline at end of file