diff --git a/Cargo.toml b/Cargo.toml
index 433989d..20db4dc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,4 +24,7 @@ pollster = "0.3.0"
 bytemuck = "1.15.0"
 futures-intrusive = "0.5.0"
 log =  { version =  "0.4.21" }
-hashbrown = "0.14.3"
\ No newline at end of file
+hashbrown = "0.14.3"
+
+[profile.dev]
+debug="line-tables-only"
\ No newline at end of file
diff --git a/crates/array/src/array/boolean_gpu.rs b/crates/array/src/array/boolean_gpu.rs
index d4a4934..7bd0195 100644
--- a/crates/array/src/array/boolean_gpu.rs
+++ b/crates/array/src/array/boolean_gpu.rs
@@ -85,7 +85,6 @@ impl BooleanArrayGPU {
 
     pub fn raw_values(&self) -> Option<Vec<bool>> {
         let result = self.gpu_device.retrive_data(&self.data);
-        let result: Vec<u8> = bytemuck::cast_slice(&result).to_vec();
         let mut bool_result = Vec::<bool>::with_capacity(self.len);
         for i in 0..self.len {
             bool_result.push(BooleanBufferBuilder::is_set_in_slice(&result, i))
@@ -211,11 +210,18 @@ mod tests {
     #[test]
     fn test_boolean_values() {
         let gpu_device = GpuDevice::new();
-        let values = vec![Some(true), Some(true), Some(false), None];
+        let mut values = vec![Some(true), Some(true), Some(false), None];
+        for i in 0..100 {
+            values.extend_from_within(0..4);
+        }
         let array = BooleanArrayGPU::from_optional_slice(&values, Arc::new(gpu_device));
 
-        let raw_values = array.raw_values().unwrap();
-        assert_eq!(raw_values, vec![true, true, false, false]);
+        let mut raw_value = vec![true, true, false, false];
+        for i in 0..100 {
+            raw_value.extend_from_within(0..4);
+        }
+
+        assert_eq!(array.raw_values().unwrap(), raw_value);
 
         let gpu_values = array.values();
         assert_eq!(gpu_values, values);
diff --git a/crates/math/compute_shaders/f32/floatbinary.wgsl b/crates/math/compute_shaders/f32/floatbinary.wgsl
new file mode 100644
index 0000000..07cb9d1
--- /dev/null
+++ b/crates/math/compute_shaders/f32/floatbinary.wgsl
@@ -0,0 +1,18 @@
+@group(0)
+@binding(0)
+var<storage, read> input_values: array<f32>;
+
+@group(0)
+@binding(1)
+var<storage, read> power_values: array<f32>;
+
+@group(0)
+@binding(2)
+var<storage, read_write> output_values: array<f32>;
+
+
+@compute
+@workgroup_size(256)
+fn power_(@builtin(global_invocation_id) global_id: vec3<u32>) {
+    output_values[global_id.x] = pow(input_values[global_id.x], power_values[global_id.x]);
+}
\ No newline at end of file
diff --git a/crates/math/compute_shaders/i32/binary.wgsl b/crates/math/compute_shaders/i32/binary.wgsl
new file mode 100644
index 0000000..c2fe206
--- /dev/null
+++ b/crates/math/compute_shaders/i32/binary.wgsl
@@ -0,0 +1,29 @@
+@group(0)
+@binding(0)
+var<storage, read> input_values: array<i32>;
+
+@group(0)
+@binding(1)
+var<storage, read_write> power_values: array<i32>;
+
+@group(0)
+@binding(2)
+var<storage, read_write> output_values: array<i32>;
+
+@compute
+@workgroup_size(256)
+fn power_(@builtin(global_invocation_id) global_id: vec3<u32>) {
+    var input_value = input_values[global_id.x];
+    var result = 1;
+    if power_values[global_id.x] >= 0 {
+        for (var i = 0; i < power_values[global_id.x]; i++) {
+            result *= input_value;
+        }
+    } else {
+        for (var i = 0; i < abs(power_values[global_id.x]); i++) {
+            result /= input_value;
+        }
+    }
+
+    output_values[global_id.x] = result;
+}
\ No newline at end of file
diff --git a/crates/math/src/f32.rs b/crates/math/src/f32.rs
index fedbc72..3c8df96 100644
--- a/crates/math/src/f32.rs
+++ b/crates/math/src/f32.rs
@@ -3,9 +3,11 @@ use arrow_gpu_array::gpu_utils::*;
 use std::sync::*;
 use wgpu::Buffer;
 
+use crate::MathBinaryType;
 use crate::{FloatMathUnaryType, MathUnaryType};
 
 const FLOATUNARY_SHADER: &str = include_str!("../compute_shaders/f32/floatunary.wgsl");
+const FLOATBINARY_SHADER: &str = include_str!("../compute_shaders/f32/floatbinary.wgsl");
 
 impl MathUnaryType for f32 {
     type OutputType = Float32ArrayGPU;
@@ -29,6 +31,28 @@ impl MathUnaryType for f32 {
     }
 }
 
+impl MathBinaryType for f32 {
+    type OutputType = Float32ArrayGPU;
+
+    const SHADER: &'static str = FLOATBINARY_SHADER;
+    const BUFFER_SIZE_MULTIPLIER: u64 = 1;
+
+    fn create_new(
+        data: Arc<Buffer>,
+        device: Arc<GpuDevice>,
+        len: usize,
+        null_buffer: Option<NullBitBufferGpu>,
+    ) -> Self::OutputType {
+        Float32ArrayGPU {
+            data,
+            gpu_device: device,
+            phantom: std::marker::PhantomData,
+            len,
+            null_buffer,
+        }
+    }
+}
+
 impl FloatMathUnaryType for f32 {
     type OutputType = Float32ArrayGPU;
     const SHADER: &'static str = FLOATUNARY_SHADER;
@@ -210,4 +234,67 @@ mod tests {
             (-3.0f32).log2()
         ]
     );
+
+    test_float_array_op!(
+        #[cfg_attr(
+            any(target_os = "macos", target_os = "linux"),
+            ignore = "-x ** 0.0 is returning different values based on OS"
+        )]
+        test_f32_power,
+        Float32ArrayGPU,
+        Float32ArrayGPU,
+        Float32ArrayGPU,
+        power,
+        power_dyn,
+        vec![
+            Some(1.0f32),
+            Some(-1.0f32),
+            Some(10.0f32),
+            Some(-10.0f32),
+            Some(3.0),
+            Some(-1.0f32),
+            None,
+            None,
+            Some(f32::NAN),
+            Some(f32::INFINITY),
+            Some(f32::NEG_INFINITY),
+            Some(f32::NEG_INFINITY),
+            Some(f32::INFINITY),
+            Some(f32::NAN),
+        ],
+        vec![
+            Some(0.0f32),
+            Some(0.0),
+            Some(0.0),
+            Some(0.0),
+            Some(2.0),
+            None,
+            Some(3.0),
+            None,
+            Some(f32::NAN),
+            Some(f32::INFINITY),
+            Some(f32::NEG_INFINITY),
+            Some(f32::INFINITY),
+            Some(f32::NEG_INFINITY),
+            Some(3.0),
+        ],
+        vec![
+            Some(1.0f32.powf(0.0)),
+            // TODO fixeme gpu -1.0 ** 0.0 gives NAN instead of 1.0
+            Some(f32::NAN),
+            Some(10.0f32.powf(0.0)),
+            // TODO fixeme gpu -10.0 ** 0.0 gives NAN instead of 1.0
+            Some(f32::NAN),
+            Some(3.0f32.powf(2.0)),
+            None,
+            None,
+            None,
+            Some(f32::NAN),
+            Some(f32::INFINITY),
+            Some(f32::NAN),
+            Some(f32::NAN),
+            Some(0.0),
+            Some(f32::NAN),
+        ]
+    );
 }
diff --git a/crates/math/src/i32.rs b/crates/math/src/i32.rs
index 8ec740c..cae32db 100644
--- a/crates/math/src/i32.rs
+++ b/crates/math/src/i32.rs
@@ -6,9 +6,10 @@ use arrow_gpu_array::{
 };
 use wgpu::Buffer;
 
-use crate::MathUnaryType;
+use crate::{MathBinaryType, MathUnaryType};
 
 const I32UNARY_SHADER: &str = include_str!("../compute_shaders/i32/unary.wgsl");
+const I32BINARY_SHADER: &str = include_str!("../compute_shaders/i32/binary.wgsl");
 
 impl MathUnaryType for i32 {
     type OutputType = Int32ArrayGPU;
@@ -32,11 +33,33 @@ impl MathUnaryType for i32 {
     }
 }
 
+impl MathBinaryType for i32 {
+    type OutputType = Int32ArrayGPU;
+
+    const SHADER: &'static str = I32BINARY_SHADER;
+    const BUFFER_SIZE_MULTIPLIER: u64 = 1;
+
+    fn create_new(
+        data: Arc<Buffer>,
+        device: Arc<GpuDevice>,
+        len: usize,
+        null_buffer: Option<NullBitBufferGpu>,
+    ) -> Self::OutputType {
+        Int32ArrayGPU {
+            data,
+            gpu_device: device,
+            phantom: std::marker::PhantomData,
+            len,
+            null_buffer,
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
     use crate::*;
-    use arrow_gpu_test_macros::test_unary_op;
     use arrow_gpu_array::array::Int32ArrayGPU;
+    use arrow_gpu_test_macros::*;
 
     test_unary_op!(
         test_i32_abs,
@@ -47,4 +70,46 @@ mod test {
         abs_dyn,
         vec![0, 1, 2, 3, 4]
     );
+
+    test_array_op!(
+        test_i32_power,
+        Int32ArrayGPU,
+        Int32ArrayGPU,
+        Int32ArrayGPU,
+        power,
+        power_dyn,
+        vec![
+            Some(0i32),
+            Some(-1),
+            Some(-2),
+            Some(-3),
+            Some(-4),
+            Some(-2),
+            None,
+            Some(1),
+            None
+        ],
+        vec![
+            Some(0i32),
+            Some(-1),
+            Some(2),
+            Some(3),
+            Some(1),
+            Some(-2),
+            Some(1),
+            None,
+            None
+        ],
+        vec![
+            Some(1i32),
+            Some(-1),
+            Some(4),
+            Some(-27),
+            Some(-4),
+            Some(0),
+            None,
+            None,
+            None
+        ]
+    );
 }
diff --git a/crates/math/src/lib.rs b/crates/math/src/lib.rs
index cd1afbf..99efd55 100644
--- a/crates/math/src/lib.rs
+++ b/crates/math/src/lib.rs
@@ -16,6 +16,7 @@ const EXP_ENTRY_POINT: &str = "exp_";
 const EXP2_ENTRY_POINT: &str = "exp2_";
 const LOG_ENTRY_POINT: &str = "log_";
 const LOG2_ENTRY_POINT: &str = "log2_";
+const POWER_ENTRY_POINT: &str = "power_";
 
 macro_rules! default_impl {
     ($self: ident, $fn: ident) => {
@@ -24,6 +25,12 @@ macro_rules! default_impl {
         pipeline.finish();
         return output;
     };
+    ($self: ident, $other: ident, $fn: ident) => {
+        let mut pipeline = ArrowComputePipeline::new($self.get_gpu_device(), None);
+        let output = Self::$fn(&$self, $other, &mut pipeline);
+        pipeline.finish();
+        return output;
+    };
 }
 
 pub trait MathUnaryPass: ArrayUtils {
@@ -49,6 +56,29 @@ pub trait MathUnaryType {
     ) -> Self::OutputType;
 }
 
+pub trait MathBinaryPass: ArrayUtils + Sized {
+    type OutputType;
+
+    fn power(&self, other: &Self) -> Self::OutputType {
+        default_impl!(self, other, power_op);
+    }
+
+    fn power_op(&self, other: &Self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType;
+}
+
+pub trait MathBinaryType {
+    type OutputType;
+    const SHADER: &'static str;
+    const BUFFER_SIZE_MULTIPLIER: u64;
+
+    fn create_new(
+        data: Arc<Buffer>,
+        device: Arc<GpuDevice>,
+        len: usize,
+        null_buffer: Option<NullBitBufferGpu>,
+    ) -> Self::OutputType;
+}
+
 pub trait FloatMathUnary: ArrayUtils {
     type OutputType;
     fn sqrt(&self) -> Self::OutputType {
@@ -91,7 +121,7 @@ pub trait FloatMathUnaryType {
     ) -> Self::OutputType;
 }
 
-macro_rules! apply_unary_function_op {
+macro_rules! apply_function_op {
     ($self: ident, $trait_name:ident, $entry_point: ident, $pipeline: ident) => {
         let dispatch_size = $self
             .data
@@ -111,6 +141,34 @@ macro_rules! apply_unary_function_op {
             &mut $pipeline.encoder,
         );
 
+        return <T as $trait_name>::create_new(
+            Arc::new(new_buffer),
+            $self.gpu_device.clone(),
+            $self.len,
+            new_null_buffer,
+        );
+    };
+    ($self: ident, $other: ident, $trait_name:ident, $entry_point: ident, $pipeline: ident) => {
+        let dispatch_size = $self
+            .data
+            .size()
+            .div_ceil(<T as ArrowPrimitiveType>::ITEM_SIZE)
+            .div_ceil(256) as u32;
+
+        let new_buffer = $pipeline.apply_binary_function(
+            &$self.data,
+            &$other.data,
+            $self.data.size() * <T as $trait_name>::BUFFER_SIZE_MULTIPLIER,
+            T::SHADER,
+            $entry_point,
+            dispatch_size,
+        );
+        let new_null_buffer = NullBitBufferGpu::merge_null_bit_buffer_op(
+            &$self.null_buffer,
+            &$other.null_buffer,
+            $pipeline,
+        );
+
         return <T as $trait_name>::create_new(
             Arc::new(new_buffer),
             $self.gpu_device.clone(),
@@ -124,7 +182,15 @@ impl<T: MathUnaryType + ArrowPrimitiveType> MathUnaryPass for PrimitiveArrayGpu<
     type OutputType = T::OutputType;
 
     fn abs_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
-        apply_unary_function_op!(self, MathUnaryType, ABS_ENTRY_POINT, pipeline);
+        apply_function_op!(self, MathUnaryType, ABS_ENTRY_POINT, pipeline);
+    }
+}
+
+impl<T: MathBinaryType + ArrowPrimitiveType> MathBinaryPass for PrimitiveArrayGpu<T> {
+    type OutputType = T::OutputType;
+
+    fn power_op(&self, other: &Self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
+        apply_function_op!(self, other, MathBinaryType, POWER_ENTRY_POINT, pipeline);
     }
 }
 
@@ -132,31 +198,31 @@ impl<T: FloatMathUnaryType + ArrowPrimitiveType> FloatMathUnary for PrimitiveArr
     type OutputType = T::OutputType;
 
     fn sqrt_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
-        apply_unary_function_op!(self, FloatMathUnaryType, SQRT_ENTRY_POINT, pipeline);
+        apply_function_op!(self, FloatMathUnaryType, SQRT_ENTRY_POINT, pipeline);
     }
 
     fn cbrt_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
-        apply_unary_function_op!(self, FloatMathUnaryType, CBRT_ENTRY_POINT, pipeline);
+        apply_function_op!(self, FloatMathUnaryType, CBRT_ENTRY_POINT, pipeline);
     }
 
     fn exp_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
-        apply_unary_function_op!(self, FloatMathUnaryType, EXP_ENTRY_POINT, pipeline);
+        apply_function_op!(self, FloatMathUnaryType, EXP_ENTRY_POINT, pipeline);
     }
 
     fn exp2_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
-        apply_unary_function_op!(self, FloatMathUnaryType, EXP2_ENTRY_POINT, pipeline);
+        apply_function_op!(self, FloatMathUnaryType, EXP2_ENTRY_POINT, pipeline);
     }
 
     fn log_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
-        apply_unary_function_op!(self, FloatMathUnaryType, LOG_ENTRY_POINT, pipeline);
+        apply_function_op!(self, FloatMathUnaryType, LOG_ENTRY_POINT, pipeline);
     }
 
     fn log2_op(&self, pipeline: &mut ArrowComputePipeline) -> Self::OutputType {
-        apply_unary_function_op!(self, FloatMathUnaryType, LOG2_ENTRY_POINT, pipeline);
+        apply_function_op!(self, FloatMathUnaryType, LOG2_ENTRY_POINT, pipeline);
     }
 }
 
-macro_rules! dyn_fn {
+macro_rules! dyn_unary_fn {
     ($([$dyn: ident, $dyn_op: ident, $array_op: ident, $($arr:ident),* ]),*) => {
         $(
             pub fn $dyn(data: &ArrowArrayGPU) -> ArrowArrayGPU {
@@ -176,7 +242,7 @@ macro_rules! dyn_fn {
     }
 }
 
-dyn_fn!(
+dyn_unary_fn!(
     [abs_dyn, abs_op_dyn, abs_op, Float32ArrayGPU, Int32ArrayGPU],
     [sqrt_dyn, sqrt_op_dyn, sqrt_op, Float32ArrayGPU],
     [cbrt_dyn, cbrt_op_dyn, cbrt_op, Float32ArrayGPU],
@@ -185,3 +251,36 @@ dyn_fn!(
     [log_dyn, log_op_dyn, log_op, Float32ArrayGPU],
     [log2_dyn, log2_op_dyn, log2_op, Float32ArrayGPU]
 );
+
+macro_rules! dyn_binary_fn {
+    ($([$dyn: ident, $dyn_op: ident, $array_op: ident, $($arr:ident),* ]),*) => {
+        $(
+            pub fn $dyn(input1: &ArrowArrayGPU, input2: &ArrowArrayGPU) -> ArrowArrayGPU {
+                let mut pipeline = ArrowComputePipeline::new(input1.get_gpu_device(), None);
+                let result = $dyn_op(input1, input2, &mut pipeline);
+                pipeline.finish();
+                result
+            }
+
+            pub fn $dyn_op(input1: &ArrowArrayGPU, input2: &ArrowArrayGPU, pipeline: &mut ArrowComputePipeline) -> ArrowArrayGPU {
+                match (input1, input2) {
+                    $((ArrowArrayGPU::$arr(arr_1), ArrowArrayGPU::$arr(arr_2)) => arr_1.$array_op(arr_2, pipeline).into(),)+
+                    _ => panic!(
+                        "Operation {} not supported for type {:?} {:?}",
+                        stringify!($function),
+                        input1.get_dtype(),
+                        input2.get_dtype(),
+                    ),
+                }
+            }
+        )+
+    }
+}
+
+dyn_binary_fn!([
+    power_dyn,
+    power_op_dyn,
+    power_op,
+    Int32ArrayGPU,
+    Float32ArrayGPU
+]);
diff --git a/crates/routines/src/bool.rs b/crates/routines/src/bool.rs
index a46ae82..44bc74a 100644
--- a/crates/routines/src/bool.rs
+++ b/crates/routines/src/bool.rs
@@ -23,7 +23,7 @@ pub(crate) fn take_bool(
         data,
         &indexes.data,
         indexes.len as u64,
-        32,
+        (indexes.len.div_ceil(32) as u64) * 4,
         TAKE_SHADER,
         "take",
         pipeline,
diff --git a/crates/test_macros/src/lib.rs b/crates/test_macros/src/lib.rs
index 49b8b9d..f6876ae 100644
--- a/crates/test_macros/src/lib.rs
+++ b/crates/test_macros/src/lib.rs
@@ -197,7 +197,8 @@ macro_rules! test_unary_op_float {
 
 #[macro_export]
 macro_rules! test_float_array_op {
-    ($fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $input_1: expr, $input_2: expr, $output: expr) => {
+    ($(#[$m:meta])* $fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $input_1: expr, $input_2: expr, $output: expr) => {
+        $(#[$m])*
         #[test]
         fn $fn_name() {
             use arrow_gpu_array::GPU_DEVICE;
@@ -216,7 +217,8 @@ macro_rules! test_float_array_op {
             }
         }
     };
-    ($fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $operation_dyn: ident, $input_1: expr, $input_2: expr, $output: expr) => {
+    ($(#[$m:meta])* $fn_name: ident, $operand1_type: ident, $operand2_type: ident, $output_type: ident, $operation: ident, $operation_dyn: ident, $input_1: expr, $input_2: expr, $output: expr) => {
+        $(#[$m])*
         #[test]
         fn $fn_name() {
             use arrow_gpu_array::GPU_DEVICE;
diff --git a/docs/src/kernels/math.md b/docs/src/kernels/math.md
index 329f077..e92f1df 100644
--- a/docs/src/kernels/math.md
+++ b/docs/src/kernels/math.md
@@ -1,11 +1,11 @@
 # Math related kernels
 
-| | absolute | sqrt | exp | exp2 | log | log2 | cbrt |
-|-|-|-|-|-|-|-|
-| Int8 | | |
-| Int16 | | |
-| Int32 |  | |
-| UInt8 | | |
-| UInt16 | | |
-| UInt32 |  | |
-| Float32 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
\ No newline at end of file
+| | absolute | sqrt | exp | exp2 | log | log2 | cbrt | power |
+|-|-|-|-|-|-|-|-|-|
+| Int8 | | | | | | | | |
+| Int16 | | | | | | | | |
+| Int32 | | | | | | | | ✓ |
+| UInt8 | | | | | | | | |
+| UInt16 | | | | | | | | |
+| UInt32 | | | | | | | | |
+| Float32 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
\ No newline at end of file