diff --git a/tripy/tests/integration/conftest.py b/tripy/tests/integration/conftest.py
new file mode 100644
index 000000000..1229219f2
--- /dev/null
+++ b/tripy/tests/integration/conftest.py
@@ -0,0 +1,61 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2024-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import pytest
+
+import tripy as tp
+
+
+@pytest.fixture(params=["compile", "eager"])
+def eager_or_compiled(request):
+    def wrapper(func, *args, **kwargs):
+        def get_input_info(x: tp.Tensor):
+            return tp.InputInfo(list(map(int, x.shape)), dtype=x.dtype)
+
+        if request.param == "eager":
+            return func(*args, **kwargs)
+
+        assert request.param == "compile"
+
+        compile_args = []
+        for arg in args:
+            # We don't want to feed DimensionSize as a dynamic input to the compiler (https://github.com/NVIDIA/TensorRT-Incubator/issues/65).
+            if isinstance(arg, tp.Tensor) and not isinstance(arg, tp.DimensionSize):
+                compile_args.append(get_input_info(arg))
+            else:
+                compile_args.append(arg)
+        compile_args = tuple(compile_args)
+
+        compile_kwargs = dict(
+            (
+                k,
+                ((get_input_info(v) if isinstance(v, tp.Tensor) and not isinstance(v, tp.DimensionSize) else v)),
+            )
+            for k, v in kwargs.items()
+        )
+
+        compiled_func = tp.compile(func, args=compile_args, kwargs=compile_kwargs)
+
+        tensor_args = tuple(x for x in args if isinstance(x, tp.Tensor) and not isinstance(x, tp.DimensionSize))
+
+        tensor_kwargs = {
+            k: v for k, v in kwargs.items() if isinstance(v, tp.Tensor) and not isinstance(v, tp.DimensionSize)
+        }
+
+        return compiled_func(*tensor_args, **tensor_kwargs)
+
+    return wrapper
diff --git a/tripy/tests/integration/test_batchnorm.py b/tripy/tests/integration/test_batchnorm.py
index 37f6cbf82..89a4c6715 100644
--- a/tripy/tests/integration/test_batchnorm.py
+++ b/tripy/tests/integration/test_batchnorm.py
@@ -26,7 +26,7 @@ class TestBatchNorm:
 
     @pytest.mark.parametrize("torch_dtype, tp_dtype", DTYPES)
     @pytest.mark.parametrize("input_shape", [(2, 2, 2, 2)])
-    def test_batchnorm_accuracy(self, torch_dtype, tp_dtype, input_shape):
+    def test_batchnorm_accuracy(self, torch_dtype, tp_dtype, input_shape, eager_or_compiled):
         eps = 1e-5
         num_features = input_shape[1]  # Number of channels in the input tensor
         batchnorm = torch.nn.BatchNorm2d(num_features=num_features, eps=eps, dtype=torch_dtype)
@@ -45,7 +45,7 @@ def test_batchnorm_accuracy(self, torch_dtype, tp_dtype, input_shape):
         input = torch.randn(input_shape, dtype=torch_dtype).to("cuda")
         tp_input = tp.Tensor(input, dtype=tp_dtype)
 
-        output = tp_batchnorm(tp_input)
+        output = eager_or_compiled(tp_batchnorm, tp_input)
 
         batchnorm.to("cuda").eval()
         with torch.no_grad():
diff --git a/tripy/tests/integration/test_cast.py b/tripy/tests/integration/test_cast.py
index 3e5902924..634373237 100644
--- a/tripy/tests/integration/test_cast.py
+++ b/tripy/tests/integration/test_cast.py
@@ -30,54 +30,53 @@ class TestCast:
         [
             (np.int32, np.float32),
             (np.float32, np.int32),
-            (np.int64, np.float32),
-            (np.float32, np.int64),
-            (np.int64, np.int32),
-            (np.int64, np.int8),
             (np.int32, np.int8),
             (np.float32, np.int8),
-            (np.int8, np.int64),
             (np.int8, np.int32),
             (np.int8, np.float32),
             # important to test conversion into bool because default StableHLO semantics
             # are simply to truncate to i1, which is not desirable
             (np.float32, bool),
             (np.int32, bool),
-            (np.int64, bool),
             # requires a dequantization first
             # TODO(#219): Dequantize fails with dynamic shapes
             # (np.int8, bool),
         ],
     )
-    def test_cast(self, input_dtype, target_dtype):
+    def test_cast(self, input_dtype, target_dtype, eager_or_compiled):
         tp_input_dtype = NUMPY_TO_TRIPY[input_dtype]
         tp_target_dtype = NUMPY_TO_TRIPY[target_dtype]
 
         # TODO(#222): Integer casts with negative numbers fail in many cases
         input_tensor = tp.Tensor([0, 1, 2], dtype=tp_input_dtype)
         np_input = cp.from_dlpack(input_tensor).get()
-        output = tp.cast(input_tensor, tp_target_dtype)
+        output = eager_or_compiled(tp.cast, input_tensor, tp_target_dtype)
 
         assert np.array_equal(cp.from_dlpack(output).get(), np_input.astype(target_dtype))
 
     # these dtypes don't have analogues in numpy
     @pytest.mark.parametrize("source_dtype", [pytest.param(tp.float8, marks=skip_if_older_than_sm89), tp.int4])
-    def test_cast_quantized_dtypes_into_bool(self, source_dtype):
+    def test_cast_quantized_dtypes_into_bool(self, source_dtype, eager_or_compiled):
         # TODO(#223): Using an odd size leads to a strange crash, so can't just use [-1.0, 0.0, 1.0]
         input_tensor = tp.Tensor([-1.0, 0.0, 0.0, 1.0], dtype=tp.float32)
-        q = tp.quantize(input_tensor, scale=1.0, dtype=source_dtype)
-        output = tp.cast(q, tp.bool)
+
+        def func(input):
+            q = tp.quantize(input, scale=1.0, dtype=source_dtype)
+            output = tp.cast(q, tp.bool)
+            return output
+
+        output = eager_or_compiled(func, input_tensor)
         assert cp.from_dlpack(output).get().tolist() == [True, False, False, True]
 
-    @pytest.mark.parametrize("target_dtype", [np.float32, np.int32, np.int64, np.int8])
-    def test_cast_from_bool(self, target_dtype):
+    @pytest.mark.parametrize("target_dtype", [np.float32, np.int32, np.int8])
+    def test_cast_from_bool(self, target_dtype, eager_or_compiled):
         tp_target_dtype = NUMPY_TO_TRIPY[target_dtype]
 
         # in principle, it is not important what *specific* values we convert to,
         # so long as false is mapped to 0 and true to nonzero
         input_tensor = tp.Tensor([False, True], dtype=tp.bool)
         np_input = cp.from_dlpack(input_tensor).get()
-        output = tp.cast(input_tensor, tp_target_dtype)
+        output = eager_or_compiled(tp.cast, input_tensor, tp_target_dtype)
 
         tp_compare_to_zero = cp.from_dlpack(output).get() == 0
         np_compare_to_zero = np_input.astype(target_dtype) == 0
diff --git a/tripy/tests/integration/test_concatenate.py b/tripy/tests/integration/test_concatenate.py
index 01ea823b5..9df2d2f70 100644
--- a/tripy/tests/integration/test_concatenate.py
+++ b/tripy/tests/integration/test_concatenate.py
@@ -33,9 +33,9 @@ class TestConcatenate:
             ([(2, 3, 4)], 0),
         ],
     )
-    def test_concat(self, tensor_shapes, dim):
+    def test_concat(self, tensor_shapes, dim, eager_or_compiled):
         tensors = [tp.ones(shape) for shape in tensor_shapes]
-        out = tp.concatenate(tensors, dim=dim)
+        out = eager_or_compiled(tp.concatenate, tensors, dim=dim)
         assert np.array_equal(
             cp.from_dlpack(out).get(), np.concatenate([np.ones(shape) for shape in tensor_shapes], axis=dim)
         )
@@ -44,8 +44,8 @@ def test_concat(self, tensor_shapes, dim):
         "tensor_shapes, dim",
         [([(2, 3, 4), (2, 4, 4)], 0), ([(4, 5, 6), (4, 1, 6)], -1)],
     )
-    def test_negative_concat(self, tensor_shapes, dim):
+    def test_negative_concat(self, tensor_shapes, dim, eager_or_compiled):
         tensors = [tp.ones(shape) for shape in tensor_shapes]
         with helper.raises(tp.TripyException, match=f"not compatible at non-concat index"):
-            out = tp.concatenate(tensors, dim=dim)
+            out = eager_or_compiled(tp.concatenate, tensors, dim=dim)
             print(out)
diff --git a/tripy/tests/integration/test_conv.py b/tripy/tests/integration/test_conv.py
index 3f67c6629..078c2890d 100644
--- a/tripy/tests/integration/test_conv.py
+++ b/tripy/tests/integration/test_conv.py
@@ -75,7 +75,7 @@ class ConvTestCase:
 @pytest.mark.parametrize("torch_dtype,tp_dtype", DTYPES)
 class TestConvolution:
     @pytest.mark.parametrize("test_case", test_cases_1d)
-    def test_convolution_1d(self, torch_dtype, tp_dtype, test_case):
+    def test_convolution_1d(self, torch_dtype, tp_dtype, test_case, eager_or_compiled):
         if not test_case.torch_pad:
             test_case.torch_pad = 0
         if not test_case.stride:
@@ -122,7 +122,7 @@ def test_convolution_1d(self, torch_dtype, tp_dtype, test_case):
             conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
 
         # FP32 kernel seems to lose some precision, and FP16 needs to be run in FP32 on torch
         rtol_ = 4e-5 if tp_dtype == tp.float32 else 1e-3
@@ -131,7 +131,7 @@ def test_convolution_1d(self, torch_dtype, tp_dtype, test_case):
         assert list(output_torch.shape) == list(expected.shape)
 
     @pytest.mark.parametrize("test_case", test_cases_2d)
-    def test_convolution_2d(self, torch_dtype, tp_dtype, test_case):
+    def test_convolution_2d(self, torch_dtype, tp_dtype, test_case, eager_or_compiled):
         if not test_case.torch_pad:
             test_case.torch_pad = 0
         if not test_case.stride:
@@ -178,7 +178,7 @@ def test_convolution_2d(self, torch_dtype, tp_dtype, test_case):
             conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
 
         rtol_ = 2e-7 if tp_dtype == tp.float32 else 1.5e-3
         output_torch = torch.from_dlpack(output)
@@ -186,7 +186,7 @@ def test_convolution_2d(self, torch_dtype, tp_dtype, test_case):
         assert list(output_torch.shape) == list(expected.shape)
 
     @pytest.mark.parametrize("test_case", test_cases_3d)
-    def test_convolution_3d(self, torch_dtype, tp_dtype, test_case):
+    def test_convolution_3d(self, torch_dtype, tp_dtype, test_case, eager_or_compiled):
         pytest.skip("TODO (#260): Fix accuracy bugs in 3D conv")
         if not test_case.torch_pad:
             test_case.torch_pad = 0
@@ -245,14 +245,14 @@ def test_convolution_3d(self, torch_dtype, tp_dtype, test_case):
             return
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
 
         rtol_ = 2e-4 if tp_dtype == tp.float32 else 1.4e-3  # 3d conv has greater accumulation error
         output_torch = torch.from_dlpack(output)
         assert torch.allclose(output_torch, expected, rtol=rtol_)
         assert list(output_torch.shape) == list(expected.shape)
 
-    def test_uneven_padding(self, torch_dtype, tp_dtype):
+    def test_uneven_padding(self, torch_dtype, tp_dtype, eager_or_compiled):
         input_torch = torch.arange(200, dtype=torch.float32, device=torch.device("cuda")).reshape(*(2, 4, 5, 5))
         input = tp.cast(tp.Tensor(input_torch), tp_dtype)
 
@@ -282,7 +282,7 @@ def test_uneven_padding(self, torch_dtype, tp_dtype):
 
         input_torch = torch_pad(input_torch)
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
 
         rtol_ = 2e-7 if tp_dtype == tp.float32 else 2e-3
         output_torch = torch.from_dlpack(output)
diff --git a/tripy/tests/integration/test_conv_transpose.py b/tripy/tests/integration/test_conv_transpose.py
index 9cc95f890..2245d024b 100644
--- a/tripy/tests/integration/test_conv_transpose.py
+++ b/tripy/tests/integration/test_conv_transpose.py
@@ -81,7 +81,7 @@ class ConvTestCase:
 @pytest.mark.parametrize("torch_dtype,tp_dtype", DTYPES)
 class TestConvolution:
     @pytest.mark.parametrize("test_case", test_cases_transpose_1d)
-    def test_transposed_convolution_1d(self, torch_dtype, tp_dtype, test_case):
+    def test_transposed_convolution_1d(self, torch_dtype, tp_dtype, test_case, eager_or_compiled):
         if not test_case.torch_pad:
             test_case.torch_pad = 0
         if not test_case.stride:
@@ -129,14 +129,14 @@ def test_transposed_convolution_1d(self, torch_dtype, tp_dtype, test_case):
             conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
 
-        rtol_ = 1e-3
+        rtol_ = 3e-3
         assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
         assert output.shape == list(expected.shape)
 
     @pytest.mark.parametrize("test_case", test_cases_transpose_2d)
-    def test_transposed_convolution_2d(self, torch_dtype, tp_dtype, test_case):
+    def test_transposed_convolution_2d(self, torch_dtype, tp_dtype, test_case, eager_or_compiled):
         if not test_case.torch_pad:
             test_case.torch_pad = 0
         if not test_case.stride:
@@ -184,14 +184,14 @@ def test_transposed_convolution_2d(self, torch_dtype, tp_dtype, test_case):
             conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
 
         rtol_ = 1e-2
         assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
         assert output.shape == list(expected.shape)
 
     @pytest.mark.parametrize("test_case", test_cases_transpose_3d)
-    def test_transposed_convolution_3d(self, torch_dtype, tp_dtype, test_case):
+    def test_transposed_convolution_3d(self, torch_dtype, tp_dtype, test_case, eager_or_compiled):
         if not test_case.torch_pad:
             test_case.torch_pad = 0
         if not test_case.stride:
@@ -239,12 +239,12 @@ def test_transposed_convolution_3d(self, torch_dtype, tp_dtype, test_case):
             conv_layer.bias = tp.cast(tp.Tensor(conv_layer_torch.bias.data), tp_dtype)
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
         rtol_ = 1.3e-6 if tp_dtype == tp.float32 else 1.6e-3
         assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
         assert output.shape == list(expected.shape)
 
-    def test_transposed_equivalency(self, torch_dtype, tp_dtype):
+    def test_transposed_equivalency(self, torch_dtype, tp_dtype, eager_or_compiled):
         input_torch = torch.arange(9, dtype=torch.float32, device=torch.device("cuda")).reshape(*(1, 1, 3, 3))
         input = tp.cast(tp.Tensor(input_torch), tp_dtype)
 
@@ -277,8 +277,8 @@ def test_transposed_equivalency(self, torch_dtype, tp_dtype):
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
         expected_transpose = conv_transpose_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
-        output_transpose = conv_transpose_layer(input)
+        output = eager_or_compiled(conv_layer, input)
+        output_transpose = eager_or_compiled(conv_transpose_layer, input)
 
         rtol_ = 2e-7 if tp_dtype == tp.float32 else 9e-4
         assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
@@ -291,7 +291,7 @@ def test_transposed_equivalency(self, torch_dtype, tp_dtype):
         assert list(expected.shape) == list(expected_transpose.shape)
 
     @pytest.mark.parametrize("test_case", test_cases_transpose_downscale)
-    def test_transposed_downscale(self, torch_dtype, tp_dtype, test_case):
+    def test_transposed_downscale(self, torch_dtype, tp_dtype, test_case, eager_or_compiled):
         input_torch = torch.arange(9, dtype=torch.float32, device=torch.device("cuda")).reshape(*(1, 1, 3, 3))
         input = tp.cast(tp.Tensor(input_torch), tp_dtype)
 
@@ -320,7 +320,7 @@ def test_transposed_downscale(self, torch_dtype, tp_dtype, test_case):
         conv_layer.weight = tp.cast(tp.Tensor(conv_layer_torch.weight.data), tp_dtype)
 
         expected = conv_layer_torch(input_torch).to(torch_dtype)
-        output = conv_layer(input)
+        output = eager_or_compiled(conv_layer, input)
 
         rtol_ = 1e-15 if tp_dtype == tp.float32 else 1e-10
         assert tp.allclose(output, tp.Tensor(expected), rtol=rtol_)
diff --git a/tripy/tests/integration/test_cumsum.py b/tripy/tests/integration/test_cumsum.py
index c8f8bbb7e..2360f3eaa 100644
--- a/tripy/tests/integration/test_cumsum.py
+++ b/tripy/tests/integration/test_cumsum.py
@@ -30,11 +30,10 @@ class TestCumsum:
             ([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], 0, [[[1, 2], [3, 4]], [[6, 8], [10, 12]]]),
         ],
     )
-    def test_cumsum(self, data, dim, expected):
+    def test_cumsum(self, data, dim, expected, eager_or_compiled):
         inp = tp.Tensor(data, dtype=tp.float32)
 
-        out = tp.cumsum(inp, dim=dim)
-
+        out = eager_or_compiled(tp.cumsum, inp, dim=dim)
         expected = tp.Tensor(expected, dtype=tp.float32)
         assert tp.allclose(out, expected)
         assert out.shape == expected.shape
diff --git a/tripy/tests/integration/test_dequantize.py b/tripy/tests/integration/test_dequantize.py
index f44f3a23b..4924ab9a6 100644
--- a/tripy/tests/integration/test_dequantize.py
+++ b/tripy/tests/integration/test_dequantize.py
@@ -29,12 +29,16 @@ class TestDequantize:
     @pytest.mark.parametrize(
         "dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
     )
-    def test_dequantize_int8_per_tensor(self, dtype):
+    def test_dequantize_int8_per_tensor(self, dtype, eager_or_compiled):
         data = [4, 8]
         input_tp = tp.Tensor(data, dtype=tp.int8)
         scale = torch.tensor(0.5, dtype=TORCH_DTYPES[dtype])
         scale_tp = tp.Tensor(scale, dtype=dtype)
-        dequantized = tp.dequantize(input_tp, scale_tp, dtype)
+
+        def func(input):
+            return tp.dequantize(input, scale_tp, dtype)
+
+        dequantized = eager_or_compiled(func, input_tp)
         expected = torch.tensor(data) * scale
         output = torch.from_dlpack(dequantized)
         assert torch.allclose(expected, output.to("cpu"))
@@ -42,7 +46,7 @@ def test_dequantize_int8_per_tensor(self, dtype):
     @pytest.mark.parametrize(
         "dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
     )
-    def test_dequantize_int8_per_channel(self, dtype):
+    def test_dequantize_int8_per_channel(self, dtype, eager_or_compiled):
         # TODO: Fix in #153
         if dtype == tp.float16:
             pytest.skip("TRT does not support fp16->int8 per-channel dequant.")
@@ -50,7 +54,11 @@ def test_dequantize_int8_per_channel(self, dtype):
         input_tp = tp.Tensor(data, dtype=tp.int8)
         scale = torch.tensor([0.8, 0.9], dtype=TORCH_DTYPES[dtype])
         scale_tp = tp.Tensor(scale, dtype=dtype)
-        dequantized = tp.dequantize(input_tp, scale_tp, dtype, dim=0)
+
+        def func(input):
+            return tp.dequantize(input, scale_tp, dtype, dim=0)
+
+        dequantized = eager_or_compiled(func, input_tp)
         expected = torch.tensor(data) * scale.reshape((2, 1))
         output = torch.from_dlpack(dequantized)
         assert torch.allclose(expected, output.to("cpu"))
diff --git a/tripy/tests/integration/test_expand.py b/tripy/tests/integration/test_expand.py
index d2ab402de..09b1fcfca 100644
--- a/tripy/tests/integration/test_expand.py
+++ b/tripy/tests/integration/test_expand.py
@@ -22,24 +22,24 @@
 
 
 class TestExpand:
-    def test_int_sizes(self):
+    def test_int_sizes(self, eager_or_compiled):
         input = tp.ones((2, 1))
-        out = tp.expand(input, (-1, 2))
+        out = eager_or_compiled(tp.expand, input, (-1, 2))
         assert np.array_equal(cp.from_dlpack(out).get(), np.ones((2, 2), dtype=np.float32))
 
-    def test_shape_sizes(self):
+    def test_shape_sizes(self, eager_or_compiled):
         input = tp.ones((2, 1))
         a = tp.ones((2, 4))
-        out = tp.expand(input, a.shape)
+        out = eager_or_compiled(tp.expand, input, a.shape)
         assert np.array_equal(cp.from_dlpack(out).get(), np.ones((2, 4), dtype=np.float32))
 
-    def test_extra_dims(self):
+    def test_extra_dims(self, eager_or_compiled):
         input = tp.ones((2, 1))
-        out = tp.expand(input, (1, -1, 2))
+        out = eager_or_compiled(tp.expand, input, (1, -1, 2))
         assert np.array_equal(cp.from_dlpack(out).get(), np.ones((1, 2, 2), dtype=np.float32))
 
-    def test_mixed_sizes(self):
+    def test_mixed_sizes(self, eager_or_compiled):
         input = tp.ones((2, 1, 1))
         a = tp.ones((4, 4))
-        out = tp.expand(input, (-1, a.shape[0], a.shape[1]))
+        out = eager_or_compiled(tp.expand, input, (-1, a.shape[0], a.shape[1]))
         assert np.array_equal(cp.from_dlpack(out).get(), np.ones((2, 4, 4), dtype=np.float32))
diff --git a/tripy/tests/integration/test_flatten.py b/tripy/tests/integration/test_flatten.py
index da16c181b..59bc32f57 100644
--- a/tripy/tests/integration/test_flatten.py
+++ b/tripy/tests/integration/test_flatten.py
@@ -29,29 +29,29 @@ class TestFlatten:
             ((2, 3, 4, 5), 1, 3, (2, 60)),  # Flatten dimensions 1 through 3
         ],
     )
-    def test_flatten(self, shape, start_dim, end_dim, expected_shape):
+    def test_flatten(self, shape, start_dim, end_dim, expected_shape, eager_or_compiled):
         cp_a = cp.arange(np.prod(shape)).reshape(shape).astype(np.float32)
         a = tp.Tensor(cp_a)
-        b = tp.flatten(a, start_dim=start_dim, end_dim=end_dim)
+        b = eager_or_compiled(tp.flatten, a, start_dim=start_dim, end_dim=end_dim)
         assert b.shape == list(expected_shape)
         assert np.array_equal(cp.from_dlpack(b).get(), cp_a.reshape(expected_shape).get())
 
-    def test_flatten_invalid_dims(self):
+    def test_flatten_invalid_dims(self, eager_or_compiled):
         shape = (2, 3, 4)
         with pytest.raises(tp.TripyException, match="Invalid dimensions"):
             a = tp.ones(shape)
             # Invalid because end_dim < start_dim
-            tp.flatten(a, start_dim=2, end_dim=1)
+            eager_or_compiled(tp.flatten, a, start_dim=2, end_dim=1)
 
-    def test_flatten_single_dim(self):
+    def test_flatten_single_dim(self, eager_or_compiled):
         shape = (2, 3, 4)
         a = tp.ones(shape)
         # Flattening a single dimension should not change the output
-        b = tp.flatten(a, start_dim=1, end_dim=1)
+        b = eager_or_compiled(tp.flatten, a, start_dim=1, end_dim=1)
         assert b.shape == [2, 3, 4]
         assert np.array_equal(cp.from_dlpack(b).get(), np.ones(shape, dtype=np.float32))
 
-    def test_flatten_with_unknown_dims(self):
+    def test_flatten_with_unknown_dims(self, eager_or_compiled):
         a = tp.ones((2, 3, 4, 5))
-        b = tp.flatten(a, start_dim=1, end_dim=-1)
+        b = eager_or_compiled(tp.flatten, a, start_dim=1, end_dim=-1)
         assert np.array_equal(cp.from_dlpack(b).get(), np.ones((2, 60), dtype=np.float32))
diff --git a/tripy/tests/integration/test_flip.py b/tripy/tests/integration/test_flip.py
index 8118716d5..ef53f6c1a 100644
--- a/tripy/tests/integration/test_flip.py
+++ b/tripy/tests/integration/test_flip.py
@@ -26,34 +26,34 @@ class TestFlip:
         "dims",
         [0, 1, None, [0, 1], [1, 0], -1, -2, [0, -1], [-2, 1]],
     )
-    def test_flip(self, dims):
+    def test_flip(self, dims, eager_or_compiled):
         cp_a = cp.arange(16).reshape((4, 4)).astype(cp.float32)
         a = tp.Tensor(cp_a, device=tp.device("gpu"))
         f = tp.flip(a, dims=dims)
         assert np.array_equal(cp.from_dlpack(f).get(), np.flip(cp_a.get(), axis=dims))
 
         # also ensure that flipping a second time restores the original value
-        f2 = tp.flip(f, dims=dims)
+        f2 = eager_or_compiled(tp.flip, f, dims=dims)
         assert cp.array_equal(cp.from_dlpack(f2), cp_a)
 
-    def test_no_op(self):
+    def test_no_op(self, eager_or_compiled):
         cp_a = cp.arange(16).reshape((4, 4)).astype(cp.float32)
         a = tp.Tensor(cp_a, device=tp.device("gpu"))
-        f = tp.flip(a, dims=[])
+        f = eager_or_compiled(tp.flip, a, dims=[])
         assert tp.equal(a, f)
 
-    def test_zero_rank(self):
+    def test_zero_rank(self, eager_or_compiled):
         t = tp.Tensor(1)
-        f = tp.flip(t)
+        f = eager_or_compiled(tp.flip, t)
         assert tp.equal(t, f)
 
     @pytest.mark.parametrize(
         "dims1, dims2",
         [(0, -2), (1, -1), ([0, 1], None), ([0, 1], [1, 0]), ([0, 1], [-2, -1])],
     )
-    def test_equivalences(self, dims1, dims2):
+    def test_equivalences(self, dims1, dims2, eager_or_compiled):
         cp_a = cp.arange(16).reshape((4, 4)).astype(cp.float32)
         a = tp.Tensor(cp_a, device=tp.device("gpu"))
-        f1 = tp.flip(a, dims=dims1)
-        f2 = tp.flip(a, dims=dims2)
+        f1 = eager_or_compiled(tp.flip, a, dims=dims1)
+        f2 = eager_or_compiled(tp.flip, a, dims=dims2)
         assert tp.equal(f1, f2)
diff --git a/tripy/tests/integration/test_full.py b/tripy/tests/integration/test_full.py
index 9a04c1664..d96885ffa 100644
--- a/tripy/tests/integration/test_full.py
+++ b/tripy/tests/integration/test_full.py
@@ -22,21 +22,21 @@
 
 
 class TestFull:
-    def test_normal_shape(self):
-        out = tp.full((2, 2), 5.0, tp.float32)
+    def test_normal_shape(self, eager_or_compiled):
+        out = eager_or_compiled(tp.full, (2, 2), 5.0, tp.float32)
         assert np.array_equal(cp.from_dlpack(out).get(), np.full((2, 2), 5.0, np.float32))
 
-    def test_shape_tensor(self):
+    def test_shape_tensor(self, eager_or_compiled):
         a = tp.ones((2, 3))
-        out = tp.full(a.shape, 5.0, tp.float32)
+        out = eager_or_compiled(tp.full, a.shape, 5.0, tp.float32)
         assert np.array_equal(cp.from_dlpack(out).get(), np.full((2, 3), 5.0, np.float32))
 
-    def test_mixed_shape(self):
+    def test_mixed_shape(self, eager_or_compiled):
         a = tp.ones((2, 3))
-        out = tp.full((a.shape[0], 4), 5.0, tp.float32)
+        out = eager_or_compiled(tp.full, (a.shape[0], 4), 5.0, tp.float32)
         assert np.array_equal(cp.from_dlpack(out).get(), np.full((2, 4), 5.0, np.float32))
 
-    def test_value_as_tensor(self):
+    def test_value_as_tensor(self, eager_or_compiled):
         a = tp.ones((2, 3))
-        out = tp.full((a.shape[0], 4), tp.Tensor(8.0), tp.float32)
+        out = eager_or_compiled(tp.full, (a.shape[0], 4), tp.Tensor(8.0), tp.float32)
         assert np.array_equal(cp.from_dlpack(out).get(), np.full((2, 4), 8.0, np.float32))
diff --git a/tripy/tests/integration/test_gather.py b/tripy/tests/integration/test_gather.py
index e2f088346..d0e05a118 100644
--- a/tripy/tests/integration/test_gather.py
+++ b/tripy/tests/integration/test_gather.py
@@ -34,11 +34,11 @@ class TestGatherOp:
             ((2, 3, 4), 1, (2)),
         ],
     )
-    def test_gather(self, x_shape, axis, indices):
+    def test_gather(self, x_shape, axis, indices, eager_or_compiled):
         x = np.arange(np.prod(x_shape)).reshape(x_shape)
         indices_tp = tp.Tensor(indices)
         a = tp.Tensor(x)
         a = tp.cast(a, tp.int32)
-        out = tp.gather(a, axis, indices_tp)
-        out.eval()
+        out = eager_or_compiled(tp.gather, a, axis, indices_tp)
+
         assert np.array_equal(cp.from_dlpack(out).get(), np.take(x, indices, axis))
diff --git a/tripy/tests/integration/test_groupnorm.py b/tripy/tests/integration/test_groupnorm.py
index 5f1cd8bc3..d56c15928 100644
--- a/tripy/tests/integration/test_groupnorm.py
+++ b/tripy/tests/integration/test_groupnorm.py
@@ -29,7 +29,7 @@ class TestGroupNorm:
     @pytest.mark.parametrize("input_shape", [(1, 10, 2)])
     @pytest.mark.parametrize("num_groups", [2, 5])
     @pytest.mark.parametrize("num_channels", [10])
-    def test_groupnorm_accuracy(self, torch_dtype, tp_dtype, input_shape, num_groups, num_channels):
+    def test_groupnorm_accuracy(self, torch_dtype, tp_dtype, input_shape, num_groups, num_channels, eager_or_compiled):
         eps = 1e-5
         groupnorm = torch.nn.GroupNorm(
             num_groups=num_groups,
@@ -50,7 +50,7 @@ def test_groupnorm_accuracy(self, torch_dtype, tp_dtype, input_shape, num_groups
         input = torch.arange(torch.prod(torch.Tensor(input_shape))).reshape(input_shape).to(torch_dtype)
         tp_input = tp.Tensor(input, dtype=tp_dtype)
 
-        output = tp.copy(tp_groupnorm(tp_input), tp.device("cpu"))
+        output = eager_or_compiled(tp.copy, tp_groupnorm(tp_input), tp.device("cpu"))
         with torch.no_grad():
             expected = groupnorm(input)
 
diff --git a/tripy/tests/integration/test_iota.py b/tripy/tests/integration/test_iota.py
index 2df779da2..44cb38ab6 100644
--- a/tripy/tests/integration/test_iota.py
+++ b/tripy/tests/integration/test_iota.py
@@ -49,17 +49,13 @@ def _compute_ref_iota(self, dtype, shape, dim):
         "shape, dim",
         [
             ((2, 3), 1),
-            ((2, 3), None),
+            ((2, 3), 0),
             ((2, 3), -1),
             ((2, 3, 4), 2),
         ],
     )
-    def test_iota(self, dtype, shape, dim):
-        if dim:
-            output = tp.iota(shape, dim, dtype[1])
-        else:
-            output = tp.iota(shape, dtype=dtype[1])
-
+    def test_iota(self, dtype, shape, dim, eager_or_compiled):
+        output = eager_or_compiled(tp.iota, shape, dim, dtype[1])
         assert np.array_equal(cp.from_dlpack(output).get(), self._compute_ref_iota(dtype[0], shape, dim))
 
     @pytest.mark.parametrize("dtype", DTYPE_PARAMS)
@@ -72,11 +68,11 @@ def test_iota(self, dtype, shape, dim):
             ((2, 3, 4), 2),
         ],
     )
-    def test_iota_like(self, dtype, shape, dim):
+    def test_iota_like(self, dtype, shape, dim, eager_or_compiled):
         if dim:
-            output = tp.iota_like(tp.ones(shape), dim, dtype[1])
+            output = eager_or_compiled(tp.iota_like, tp.ones(shape), dim, dtype[1])
         else:
-            output = tp.iota_like(tp.ones(shape), dtype=dtype[1])
+            output = eager_or_compiled(tp.iota_like, tp.ones(shape), dtype=dtype[1])
 
         assert np.array_equal(cp.from_dlpack(output).get(), self._compute_ref_iota(dtype[0], shape, dim))
 
@@ -98,12 +94,12 @@ def test_negative_no_casting(self, dtype):
         ):
             print(out)
 
-    def test_iota_from_shape_tensor(self):
+    def test_iota_from_shape_tensor(self, eager_or_compiled):
         a = tp.ones((2, 2))
-        output = tp.iota(a.shape)
+        output = eager_or_compiled(tp.iota, a.shape)
         assert np.array_equal(cp.from_dlpack(output).get(), self._compute_ref_iota("float32", (2, 2), 0))
 
-    def test_iota_from_mixed_seqence(self):
+    def test_iota_from_mixed_seqence(self, eager_or_compiled):
         a = tp.ones((2, 2))
-        output = tp.iota((3, a.shape[0]))
+        output = eager_or_compiled(tp.iota, (3, a.shape[0]))
         assert np.array_equal(cp.from_dlpack(output).get(), self._compute_ref_iota("float32", (3, 2), 0))
diff --git a/tripy/tests/integration/test_layernorm.py b/tripy/tests/integration/test_layernorm.py
index 088054c39..b1304ae63 100644
--- a/tripy/tests/integration/test_layernorm.py
+++ b/tripy/tests/integration/test_layernorm.py
@@ -31,7 +31,7 @@ class TestLayerNorm:
     @pytest.mark.parametrize("torch_dtype, tp_dtype", DTYPES)
     @pytest.mark.parametrize("input_shape", [(2, 2, 2)])
     @pytest.mark.parametrize("normalized_shape", [(2, 2), (2,)])
-    def test_layernorm_accuracy(self, torch_dtype, tp_dtype, input_shape, normalized_shape):
+    def test_layernorm_accuracy(self, torch_dtype, tp_dtype, input_shape, normalized_shape, eager_or_compiled):
         eps = 1e-5
         layernorm = torch.nn.LayerNorm(
             normalized_shape=normalized_shape,
@@ -51,7 +51,7 @@ def test_layernorm_accuracy(self, torch_dtype, tp_dtype, input_shape, normalized
         input = torch.arange(torch.prod(torch.Tensor(input_shape))).reshape(input_shape).to(torch_dtype)
         tp_input = tp.Tensor(input, dtype=tp_dtype)
 
-        output = tp.copy(tp_layernorm(tp_input), tp.device("cpu"))
+        output = eager_or_compiled(tp.copy, tp_layernorm(tp_input), tp.device("cpu"))
         with torch.no_grad():
             expected = layernorm(input)
 
diff --git a/tripy/tests/integration/test_linear.py b/tripy/tests/integration/test_linear.py
index ff4899a74..137d4a00d 100644
--- a/tripy/tests/integration/test_linear.py
+++ b/tripy/tests/integration/test_linear.py
@@ -25,7 +25,7 @@
 
 
 class TestLinear:
-    def test_linear_module(self):
+    def test_linear_module(self, eager_or_compiled):
         class Network(tp.Module):
             def __init__(self):
                 super().__init__()
@@ -41,7 +41,7 @@ def __call__(self, x):
         cp_a1 = cp.ones((3, 4), dtype=cp.float32)
         a1 = tp.Tensor(cp_a1, device=tp.device("gpu"))
 
-        out = net(a1)
+        out = eager_or_compiled(net, a1)
 
         np_out = cp_a1.get() @ (np_weight.transpose()) + np_bias
 
@@ -84,7 +84,7 @@ def __call__(self, x):
     @pytest.mark.parametrize("use_input_scale", [False, True])
     @pytest.mark.parametrize("quant_dtype", [tp.int8, pytest.param(tp.float8, marks=skip_if_older_than_sm89)])
     @pytest.mark.parametrize("weight_quant_dim", [None, 0, 1])
-    def test_quant_linear(self, use_input_scale, quant_dtype, weight_quant_dim):
+    def test_quant_linear(self, use_input_scale, quant_dtype, weight_quant_dim, eager_or_compiled):
         net = self._create_network(use_input_scale, quant_dtype, weight_quant_dim)
         np_weight = cp.from_dlpack(net.linear.weight).get()
         np_bias = cp.from_dlpack(net.linear.bias).get()
@@ -96,9 +96,9 @@ def test_quant_linear(self, use_input_scale, quant_dtype, weight_quant_dim):
                 tp.TripyException,
                 match="Unsupported quantization parameters for Linear module.",
             ):
-                out = net(a1)
+                out = eager_or_compiled(net, a1)
         else:
-            out = net(a1)
+            out = eager_or_compiled(net, a1)
 
             np_out = cp_a1.get() @ (np_weight.transpose()) + np_bias
 
@@ -114,7 +114,7 @@ def test_quant_linear(self, use_input_scale, quant_dtype, weight_quant_dim):
         ],
         ids=["block-wise", "per-tensor", "per-channel-0", "per-channel-1"],
     )
-    def test_quant_linear_int4_weight_only(self, weight_quant_dim, scale):
+    def test_quant_linear_int4_weight_only(self, weight_quant_dim, scale, eager_or_compiled):
         scale = tp.Parameter(scale)
 
         linear = tp.Linear(4, 8, quant_dtype=tp.int4, weight_quant_dim=weight_quant_dim)
@@ -128,7 +128,7 @@ def test_quant_linear_int4_weight_only(self, weight_quant_dim, scale):
 
         cp_input = cp.ones((4, 4), dtype=np.float32)
         input = tp.Tensor(cp_input, device=tp.device("gpu"))
-        out = linear(input)
+        out = eager_or_compiled(linear, input)
 
         np_out = cp_input.get() @ (np_weight.transpose()) + np_bias
 
diff --git a/tripy/tests/integration/test_matrix_multiplication.py b/tripy/tests/integration/test_matrix_multiplication.py
index 57731b674..b19e38937 100644
--- a/tripy/tests/integration/test_matrix_multiplication.py
+++ b/tripy/tests/integration/test_matrix_multiplication.py
@@ -23,23 +23,27 @@
 import tripy.common.datatype
 
 
+def gemm(a, b):
+    return a @ b
+
+
 class TestMatrixMultiplication:
-    def test_2d_tensors(self):
+    def test_2d_tensors(self, eager_or_compiled):
         a_np = np.arange(6).reshape((2, 3)).astype(np.float32)
         b_np = np.arange(6).reshape((3, 2)).astype(np.float32)
         a = tp.Tensor(a_np)
         b = tp.Tensor(b_np)
 
-        out = a @ b
+        out = eager_or_compiled(gemm, a, b)
         assert tp.allclose(out, tp.Tensor(a_np @ b_np))
 
-    def test_1d_tensors(self):
+    def test_1d_tensors(self, eager_or_compiled):
         a_np = np.arange(64).astype(np.float32)  # 1D Tensor
         b_np = np.arange(64).astype(np.float32)  # 1D Tensor
         a = tripy.Tensor(cp.asanyarray(a_np))
         b = tripy.Tensor(cp.asanyarray(b_np))
 
-        out = a @ b
+        out = eager_or_compiled(gemm, a, b)
         assert tp.allclose(out, tp.Tensor(cp.array(a_np @ b_np)), atol=1e-2)
 
     @pytest.mark.parametrize(
@@ -53,11 +57,11 @@ def test_1d_tensors(self):
             ((1, 2, 3), (0, 0, 3, 2)),  # Broadcasting batch dims with 0
         ],
     )
-    def test_broadcast_gemm(self, shape_a, shape_b):
+    def test_broadcast_gemm(self, shape_a, shape_b, eager_or_compiled):
         a_np = np.arange(np.prod(shape_a)).reshape(shape_a).astype(np.float32)
         b_np = np.arange(np.prod(shape_b)).reshape(shape_b).astype(np.float32)
         a = tp.Tensor(a_np)
         b = tp.Tensor(b_np)
 
-        out = a @ b
+        out = eager_or_compiled(gemm, a, b)
         assert tp.allclose(out, tp.Tensor(a_np @ b_np))
diff --git a/tripy/tests/integration/test_outer.py b/tripy/tests/integration/test_outer.py
index 8ba7be979..53f8b5237 100644
--- a/tripy/tests/integration/test_outer.py
+++ b/tripy/tests/integration/test_outer.py
@@ -19,10 +19,10 @@
 
 
 class TestOuter:
-    def test_outer(self):
+    def test_outer(self, eager_or_compiled):
         v1 = tp.arange(5, dtype=tp.float32)
         v2 = tp.arange(4, dtype=tp.float32)
-        output = tp.outer(v1, v2)
+        output = eager_or_compiled(tp.outer, v1, v2)
 
         t1 = torch.arange(5, dtype=torch.float32)
         t2 = torch.arange(4, dtype=torch.float32)
@@ -30,9 +30,9 @@ def test_outer(self):
         assert output.shape == list(torch_out.shape)
         assert tp.allclose(output, tp.Tensor(torch_out))
 
-    def test_empty(self):
+    def test_empty(self, eager_or_compiled):
         v1 = tp.Tensor([])
         v2 = tp.arange(3, dtype=tp.float32)
-        output = tp.outer(v1, v2)
+        output = eager_or_compiled(tp.outer, v1, v2)
 
         assert output.shape == [0, 3]
diff --git a/tripy/tests/integration/test_pad.py b/tripy/tests/integration/test_pad.py
index 8843055ee..578cf4a12 100644
--- a/tripy/tests/integration/test_pad.py
+++ b/tripy/tests/integration/test_pad.py
@@ -29,19 +29,19 @@ class TestPad:
             (((1, 2), (2, 3)), 1),
         ],
     )
-    def test_pad_constant(self, pad, value):
+    def test_pad_constant(self, pad, value, eager_or_compiled):
         inp = np.arange(4, dtype=np.int32).reshape((2, 2))
 
-        out = tp.pad(tp.Tensor(inp), pad, value=value)
+        out = eager_or_compiled(tp.pad, tp.Tensor(inp), pad, value=value)
         expected = np.pad(inp, pad, constant_values=value)
 
         assert np.array_equal(cp.from_dlpack(out).get(), expected)
 
-    def test_pad_tensor(self):
+    def test_pad_tensor(self, eager_or_compiled):
         inp = np.arange(6, dtype=np.float32).reshape((2, 3))
 
         inp_tp = tp.Tensor(inp)
-        out = tp.pad(tp.Tensor(inp), ((0, inp_tp.shape[0]), (inp_tp.shape[1], 0)))
+        out = eager_or_compiled(tp.pad, tp.Tensor(inp), ((0, inp_tp.shape[0]), (inp_tp.shape[1], 0)))
         expected = np.pad(inp, ((0, 2), (3, 0)))
 
         assert np.array_equal(cp.from_dlpack(out).get(), expected)
diff --git a/tripy/tests/integration/test_pooling.py b/tripy/tests/integration/test_pooling.py
index 86dd45a34..1e28f956e 100644
--- a/tripy/tests/integration/test_pooling.py
+++ b/tripy/tests/integration/test_pooling.py
@@ -32,7 +32,7 @@ class TestPooling:
     )
     @pytest.mark.parametrize("dtype", [tp.float32, tp.float16, tp.int8])
     @pytest.mark.parametrize("pool_type", ["max", "avg"])
-    def test_pool_2d(self, kernel_dims, stride, padding, dtype, pool_type):
+    def test_pool_2d(self, kernel_dims, stride, padding, dtype, pool_type, eager_or_compiled):
         inp_tp = tp.reshape(tp.arange(64, dtype=dtype), (1, 1, 8, 8))
         torch_padding = (padding[0][0], padding[1][0])
 
@@ -40,7 +40,7 @@ def test_pool_2d(self, kernel_dims, stride, padding, dtype, pool_type):
             pytest.skip("Torch average pool is not implemented for int8")
 
         if pool_type == "max":
-            out = tp.maxpool(inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
+            out = eager_or_compiled(tp.maxpool, inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
             pool_torch = torch.nn.MaxPool2d(kernel_size=kernel_dims, stride=stride, padding=torch_padding)
         elif pool_type == "avg":
             if torch_padding != (0, 0):
@@ -48,7 +48,7 @@ def test_pool_2d(self, kernel_dims, stride, padding, dtype, pool_type):
                     "https://github.com/NVIDIA/TensorRT-Incubator/issues/241: Tripy average pool is incorrect when padding != 0."
                 )
 
-            out = tp.avgpool(inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
+            out = eager_or_compiled(tp.avgpool, inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
             pool_torch = torch.nn.AvgPool2d(kernel_size=kernel_dims, stride=stride, padding=torch_padding)
 
         out_torch = torch.from_dlpack(out).to("cpu")
@@ -64,7 +64,7 @@ def test_pool_2d(self, kernel_dims, stride, padding, dtype, pool_type):
     )
     @pytest.mark.parametrize("dtype", [tp.float32, tp.float16])
     @pytest.mark.parametrize("pool_type", ["max", "avg"])
-    def test_pool_3d(self, kernel_dims, stride, padding, dtype, pool_type):
+    def test_pool_3d(self, kernel_dims, stride, padding, dtype, pool_type, eager_or_compiled):
         inp_tp = tp.reshape(tp.arange(512, dtype=dtype), (1, 1, 8, 8, 8))
         torch_padding = (padding[0][0], padding[1][0], padding[2][0])
 
@@ -74,10 +74,10 @@ def test_pool_3d(self, kernel_dims, stride, padding, dtype, pool_type):
             )
 
         if pool_type == "max":
-            out = tp.maxpool(inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
+            out = eager_or_compiled(tp.maxpool, inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
             pool_torch = torch.nn.MaxPool3d(kernel_size=kernel_dims, stride=stride, padding=torch_padding)
         elif pool_type == "avg":
-            out = tp.avgpool(inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
+            out = eager_or_compiled(tp.avgpool, inp_tp, kernel_dims=kernel_dims, stride=stride, padding=padding)
             pool_torch = torch.nn.AvgPool3d(kernel_size=kernel_dims, stride=stride, padding=torch_padding)
 
         out_torch = torch.from_dlpack(out).to("cpu")
diff --git a/tripy/tests/integration/test_quantize.py b/tripy/tests/integration/test_quantize.py
index 826db83bf..ee458d108 100644
--- a/tripy/tests/integration/test_quantize.py
+++ b/tripy/tests/integration/test_quantize.py
@@ -30,24 +30,42 @@ class TestQuantize:
     @pytest.mark.parametrize(
         "dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
     )
-    def test_quantize_int8_per_tensor(self, dtype):
+    def test_quantize_int8_per_tensor(self, dtype, eager_or_compiled):
         input = torch.tensor([1.0, 2.0], dtype=TORCH_DTYPES[dtype])
         scale = torch.tensor(0.5, dtype=TORCH_DTYPES[dtype])
         input_tp = tp.Tensor(input, dtype=dtype)
         scale_tp = tp.Tensor(scale, dtype=dtype)
-        quantized = tp.quantize(input_tp, scale_tp, tp.int8)
+
+        def func(input):
+            return tp.quantize(input, scale_tp, tp.int8)
+
+        quantized = eager_or_compiled(func, input_tp)
         expected = (input / scale).to(dtype=torch.int8)
         assert torch.equal(expected, torch.from_dlpack(quantized).to("cpu"))
 
     @pytest.mark.parametrize(
-        "dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
+        "dtype",
+        [
+            tp.float32,
+            pytest.param(
+                tp.float16,
+                marks=pytest.mark.skip(
+                    reason="Known float16 precision issues due to https://github.com/NVIDIA/TensorRT-Incubator/issues/392"
+                ),
+            ),
+            pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80),
+        ],
     )
-    def test_quantize_int8_per_channel(self, dtype):
+    def test_quantize_int8_per_channel(self, dtype, eager_or_compiled):
         input = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=TORCH_DTYPES[dtype])
         scale = torch.tensor([0.2, 0.1], dtype=TORCH_DTYPES[dtype])
         input_tp = tp.Tensor(input, dtype=dtype)
         scale_tp = tp.Tensor(scale, dtype=dtype)
-        quantized = tp.quantize(input_tp, scale_tp, tp.int8, dim=0)
+
+        def func(input):
+            return tp.quantize(input, scale_tp, tp.int8, dim=0)
+
+        quantized = eager_or_compiled(func, input_tp)
         expected = (input / scale.reshape(2, 1)).to(dtype=torch.int8)
         assert torch.equal(expected, torch.from_dlpack(quantized).to("cpu"))
 
@@ -55,12 +73,16 @@ def test_quantize_int8_per_channel(self, dtype):
         "dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
     )
     @skip_if_older_than_sm89
-    def test_quantize_fp8_per_tensor(self, dtype):
+    def test_quantize_fp8_per_tensor(self, dtype, eager_or_compiled):
         input = torch.tensor([1.0, 2.0], dtype=TORCH_DTYPES[dtype])
         scale = torch.tensor(0.5, dtype=TORCH_DTYPES[dtype])
         input_tp = tp.Tensor(input, dtype=dtype)
         scale_tp = tp.Tensor(scale, dtype=dtype)
-        quantized = tp.quantize(input_tp, scale_tp, tp.float8)
+
+        def func(input):
+            return tp.quantize(input, scale_tp, tp.float8)
+
+        quantized = eager_or_compiled(func, input_tp)
         assert quantized.dtype == tp.float8
         expected = (input / scale).to(dtype=torch.float32)
         with raises(
@@ -74,12 +96,16 @@ def test_quantize_fp8_per_tensor(self, dtype):
         "dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
     )
     @skip_if_older_than_sm89
-    def test_quantize_fp8_per_channel(self, dtype):
+    def test_quantize_fp8_per_channel(self, dtype, eager_or_compiled):
         input = torch.tensor([[1.0, 2.0], [3.0, 4.0]], dtype=TORCH_DTYPES[dtype])
         scale = torch.tensor([0.2, 0.1], dtype=TORCH_DTYPES[dtype])
         input_tp = tp.Tensor(input, dtype=dtype)
         scale_tp = tp.Tensor(scale, dtype=dtype)
-        quantized = tp.quantize(input_tp, scale_tp, tp.float8, dim=0)
+
+        def func(input):
+            return tp.quantize(input, scale_tp, tp.float8, dim=0)
+
+        quantized = eager_or_compiled(func, input_tp)
         assert quantized.dtype == tp.float8
         expected = (input / scale.reshape(2, 1)).to(dtype=torch.float32)
         with raises(
@@ -93,7 +119,7 @@ def test_quantize_fp8_per_channel(self, dtype):
         "dtype", [tp.float32, tp.float16, pytest.param(tp.bfloat16, marks=skip_if_older_than_sm80)]
     )
     @pytest.mark.parametrize("quant_mode", ["block-wise", "per-tensor", "per-channel-0", "per-channel-1"])
-    def test_qdq_int4(self, dtype, quant_mode):
+    def test_qdq_int4(self, dtype, quant_mode, eager_or_compiled):
         if quant_mode == "block-wise":
             dim = None
             scale = torch.ones((2, 4), dtype=TORCH_DTYPES[dtype])
@@ -109,14 +135,22 @@ def test_qdq_int4(self, dtype, quant_mode):
 
         input = torch.ones((4, 4), dtype=TORCH_DTYPES[dtype])
         input_tp = tp.Tensor(input, dtype=dtype)
-        scale_tp = tp.Tensor(scale)
-        quantized = tp.quantize(input_tp, scale_tp, tp.int4, dim)
-        dequantized = tp.dequantize(quantized, scale_tp, dtype, dim)
+
+        def func(inp):
+            scale_tp = tp.Tensor(scale)
+            quantized = tp.quantize(input_tp, scale_tp, tp.int4, dim)
+            dequantized = tp.dequantize(quantized, scale_tp, dtype, dim)
+            return dequantized
+
+        dequantized = eager_or_compiled(func, input_tp)
         assert torch.equal(input, torch.from_dlpack(dequantized).to("cpu"))
 
-    def test_non_constant_scale(self):
+    def test_non_constant_scale(self, eager_or_compiled):
         input = tp.ones((4, 4))
         scale = tp.ones((4,))
-        quantized = tp.quantize(input, scale, tp.int8, dim=0)
 
+        def func(input):
+            return tp.quantize(input, scale, tp.int8, dim=0)
+
+        quantized = eager_or_compiled(func, input)
         assert bool(cp.all(cp.from_dlpack(quantized) == cp.ones((4, 4), dtype=cp.int8)))
diff --git a/tripy/tests/integration/test_reduce.py b/tripy/tests/integration/test_reduce.py
index 66db0a0f4..bb922675b 100644
--- a/tripy/tests/integration/test_reduce.py
+++ b/tripy/tests/integration/test_reduce.py
@@ -36,10 +36,10 @@ class TestReduceOp:
             ((2, 3, 4, 5), (-2, -1), True),
         ],
     )
-    def test_all(self, x_shape, axis, keepdim):
+    def test_all(self, x_shape, axis, keepdim, eager_or_compiled):
         x = np.array([i % 2 == 0 for i in np.arange(np.prod(x_shape))]).reshape(x_shape)
         a = tp.Tensor(x)
-        out = tp.all(a, dim=axis, keepdim=keepdim)
+        out = eager_or_compiled(tp.all, a, dim=axis, keepdim=keepdim)
         # np.array is necessary to deal with case where x.all returns a numpy scalar (5th case)
         expected = np.array(x.all(axis=axis, keepdims=keepdim))
         assert np.array_equal(np.from_dlpack(tp.copy(out, device=tp.device("cpu"))), expected)
@@ -56,10 +56,10 @@ def test_all(self, x_shape, axis, keepdim):
             ((2, 3, 4, 5), (-2, -1), True),
         ],
     )
-    def test_any(self, x_shape, axis, keepdim):
+    def test_any(self, x_shape, axis, keepdim, eager_or_compiled):
         x = np.array([i % 2 == 0 for i in np.arange(np.prod(x_shape))]).reshape(x_shape)
         a = tp.Tensor(x)
-        out = tp.any(a, dim=axis, keepdim=keepdim)
+        out = eager_or_compiled(tp.any, a, dim=axis, keepdim=keepdim)
         expected = np.array(x.any(axis=axis, keepdims=keepdim))
         assert np.array_equal(np.from_dlpack(tp.copy(out, device=tp.device("cpu"))), expected)
 
@@ -81,11 +81,11 @@ def test_any(self, x_shape, axis, keepdim):
         ],
     )
     @pytest.mark.parametrize("dtype", [tp.float32, tp.float16])
-    def test_mean(self, x_shape, axis, keepdim: bool, dtype):
+    def test_mean(self, x_shape, axis, keepdim: bool, dtype, eager_or_compiled):
         np_dtype = np.float32 if dtype == tp.float32 else np.float16
         x = np.arange(np.prod(x_shape)).reshape(x_shape).astype(np_dtype)
         a = tp.Tensor(x, dtype=dtype)
-        out = tp.mean(a, dim=axis, keepdim=keepdim)
+        out = eager_or_compiled(tp.mean, a, dim=axis, keepdim=keepdim)
         expected = tp.Tensor(cp.array(x.mean(axis=axis, keepdims=keepdim)))
         assert out.shape == expected.shape
         assert tp.allclose(out, expected, rtol=1e-3, atol=1e-3)
@@ -102,10 +102,10 @@ def test_mean(self, x_shape, axis, keepdim: bool, dtype):
             ((2, 3, 4, 5), (-2, -1), True),
         ],
     )
-    def test_var(self, x_shape, axis, keepdim: bool):
+    def test_var(self, x_shape, axis, keepdim: bool, eager_or_compiled):
         x = np.arange(np.prod(x_shape)).reshape(x_shape).astype(np.float32)
         a = tp.Tensor(x)
-        out = tp.var(a, dim=axis, keepdim=keepdim)
+        out = eager_or_compiled(tp.var, a, dim=axis, keepdim=keepdim)
         torch_tensor = torch.Tensor(x)
         expected = tp.Tensor(torch_tensor.var(dim=axis, keepdim=keepdim))
         assert out.shape == expected.shape
@@ -122,10 +122,10 @@ def test_var(self, x_shape, axis, keepdim: bool):
             ((2, 3, 4), None, True),
         ],
     )
-    def test_argmax(self, x_shape, axis, keepdim: bool):
+    def test_argmax(self, x_shape, axis, keepdim: bool, eager_or_compiled):
         x = np.arange(np.prod(x_shape)).reshape(x_shape).astype(np.float32)
         a = tp.Tensor(x)
-        out = tp.argmax(a, dim=axis, keepdim=keepdim)
+        out = eager_or_compiled(tp.argmax, a, dim=axis, keepdim=keepdim)
         assert np.array_equal(cp.from_dlpack(out).get(), np.array(x.argmax(axis=axis, keepdims=keepdim)))
 
     @pytest.mark.parametrize(
@@ -139,8 +139,8 @@ def test_argmax(self, x_shape, axis, keepdim: bool):
             ((2, 3, 4), None, True),
         ],
     )
-    def test_argmin(self, x_shape, axis, keepdim: bool):
+    def test_argmin(self, x_shape, axis, keepdim: bool, eager_or_compiled):
         x = np.arange(np.prod(x_shape)).reshape(x_shape).astype(np.float32)
         a = tp.Tensor(x)
-        out = tp.argmin(a, dim=axis, keepdim=keepdim)
+        out = eager_or_compiled(tp.argmin, a, dim=axis, keepdim=keepdim)
         assert np.array_equal(cp.from_dlpack(out).get(), np.array(x.argmin(axis=axis, keepdims=keepdim)))
diff --git a/tripy/tests/integration/test_repeat.py b/tripy/tests/integration/test_repeat.py
index 89b34ca43..86bc54556 100644
--- a/tripy/tests/integration/test_repeat.py
+++ b/tripy/tests/integration/test_repeat.py
@@ -30,18 +30,18 @@ class TestRepeat:
             (0, 1),
         ],
     )
-    def test_repeat(self, repeats, dim):
+    def test_repeat(self, repeats, dim, eager_or_compiled):
         inp = np.arange(4, dtype=np.int32).reshape((2, 2))
 
-        out = tp.repeat(tp.Tensor(inp), repeats, dim)
+        out = eager_or_compiled(tp.repeat, tp.Tensor(inp), repeats, dim)
         expected = np.repeat(inp, repeats, dim)
 
         assert np.array_equal(np.from_dlpack(tp.copy(out, device=tp.device("cpu"))), expected)
 
-    def test_repeat_shape_scalar(self):
+    def test_repeat_shape_scalar(self, eager_or_compiled):
         inp = np.arange(4, dtype=np.int32).reshape((2, 2))
         s = tp.ones((1, 2))
-        out = tp.repeat(tp.Tensor(inp), s.shape[1], 0)
+        out = eager_or_compiled(tp.repeat, tp.Tensor(inp), repeats=s.shape[1], dim=0)
         expected = np.repeat(inp, 2, 0)
 
         assert np.array_equal(np.from_dlpack(tp.copy(out, device=tp.device("cpu"))), expected)
diff --git a/tripy/tests/integration/test_reshape.py b/tripy/tests/integration/test_reshape.py
index c30c01501..e7343c6b6 100644
--- a/tripy/tests/integration/test_reshape.py
+++ b/tripy/tests/integration/test_reshape.py
@@ -31,21 +31,21 @@ class TestReshape:
             ((2, 4), (1, -1)),  # check negative dim
         ],
     )
-    def test_static_reshape(self, shape, new_shape):
+    def test_static_reshape(self, shape, new_shape, eager_or_compiled):
         cp_a = cp.arange(np.prod(shape)).reshape(shape).astype(np.float32)
         a = tp.Tensor(cp_a, device=tp.device("gpu"))
-        b = tp.reshape(a, new_shape)
+        b = eager_or_compiled(tp.reshape, a, new_shape)
         if -1 in new_shape:
             new_shape = tuple(np.prod(shape) // -np.prod(new_shape) if d == -1 else d for d in new_shape)
         assert np.array_equal(cp.from_dlpack(b).get(), cp_a.reshape(new_shape).get())
 
-    def test_reshape_shape_tensor(self):
+    def test_reshape_shape_tensor(self, eager_or_compiled):
         a = tp.ones((2, 3, 4))
         b = tp.ones((2, 3, 2, 2))
-        out = tp.reshape(a, (a.shape[0], a.shape[1], b.shape[2], b.shape[3]))
+        out = eager_or_compiled(tp.reshape, a, (a.shape[0], a.shape[1], b.shape[2], b.shape[3]))
         assert np.array_equal(cp.from_dlpack(out).get(), np.ones((2, 3, 2, 2), dtype=np.float32))
 
-    def test_reshape_shape_with_unknown(self):
+    def test_reshape_shape_with_unknown(self, eager_or_compiled):
         a = tp.ones((2, 3, 4))
-        out = tp.reshape(a, (2, a.shape[1], a.shape[2] / 2, -1))
+        out = eager_or_compiled(tp.reshape, a, (2, a.shape[1], a.shape[2] / 2, -1))
         assert np.array_equal(cp.from_dlpack(out).get(), np.ones((2, 3, 2, 2), dtype=np.float32))
diff --git a/tripy/tests/integration/test_resize.py b/tripy/tests/integration/test_resize.py
index f080ef03b..137fb82d8 100644
--- a/tripy/tests/integration/test_resize.py
+++ b/tripy/tests/integration/test_resize.py
@@ -24,10 +24,14 @@
 class TestResize:
 
     @pytest.mark.parametrize("mode", ["nearest", "linear", "cubic"])
-    def test_scales(self, mode):
+    def test_scales(self, mode, eager_or_compiled):
         inp_torch = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
         inp_tp = tp.Tensor(inp_torch)
-        out_tp = tp.resize(inp_tp, mode, scales=(1, 1, 2, 2))
+
+        def resize(inp, mode, scales):
+            return tp.resize(inp, mode=mode, scales=scales, align_corners=False)
+
+        out_tp = eager_or_compiled(resize, inp_tp, mode=mode, scales=(1, 1, 2, 2))
         torch_mode = {
             "nearest": "nearest",
             "linear": "bilinear",
@@ -39,10 +43,14 @@ def test_scales(self, mode):
         assert torch.allclose(out_torch, expected)
 
     @pytest.mark.parametrize("mode", ["nearest", "linear", "cubic"])
-    def test_output_shape(self, mode):
+    def test_output_shape(self, mode, eager_or_compiled):
         inp_torch = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
         inp_tp = tp.Tensor(inp_torch)
-        out_tp = tp.resize(inp_tp, mode, output_shape=[1, 1, 8, 8])
+
+        def resize(inp, mode, output_shape):
+            return tp.resize(inp, mode=mode, output_shape=output_shape, align_corners=False)
+
+        out_tp = eager_or_compiled(resize, inp_tp, mode, output_shape=[1, 1, 8, 8])
         torch_mode = {
             "nearest": "nearest",
             "linear": "bilinear",
diff --git a/tripy/tests/integration/test_sequential.py b/tripy/tests/integration/test_sequential.py
index b6ef3e260..1429869cc 100644
--- a/tripy/tests/integration/test_sequential.py
+++ b/tripy/tests/integration/test_sequential.py
@@ -21,7 +21,7 @@
 
 
 class TestSequential:
-    def test_basic_forward_pass_accuracy(self):
+    def test_basic_forward_pass_accuracy(self, eager_or_compiled):
         torch_model = torch.nn.Sequential(
             torch.nn.Linear(1, 3, dtype=torch.float32, device="cuda"),
             torch.nn.Linear(3, 2, dtype=torch.float32, device="cuda"),
@@ -36,7 +36,7 @@ def test_basic_forward_pass_accuracy(self):
         input_tensor = torch.tensor([[1.0]], dtype=torch.float32, device="cuda")
         tp_input = tp.Tensor(input_tensor, dtype=tp.float32)
 
-        tp_output = tp_model(tp_input)
+        tp_output = eager_or_compiled(tp_model, tp_input)
 
         torch_model.eval()
         with torch.no_grad():
@@ -45,7 +45,7 @@ def test_basic_forward_pass_accuracy(self):
         rtol_ = 2e-6
         assert torch.allclose(torch.from_dlpack(tp_output), torch_output, rtol=rtol_)
 
-    def test_dict_forward_pass_accuracy(self):
+    def test_dict_forward_pass_accuracy(self, eager_or_compiled):
         torch_model = torch.nn.Sequential(
             torch.nn.Linear(1, 3, dtype=torch.float32, device="cuda"),
             torch.nn.Linear(3, 2, dtype=torch.float32, device="cuda"),
@@ -63,7 +63,7 @@ def test_dict_forward_pass_accuracy(self):
         input_tensor = torch.tensor([[1.0]], dtype=torch.float32, device="cuda")
         tp_input = tp.Tensor(input_tensor, dtype=tp.float32)
 
-        tp_output = tp_model(tp_input)
+        tp_output = eager_or_compiled(tp_model, tp_input)
 
         torch_model.eval()
         with torch.no_grad():
@@ -74,7 +74,7 @@ def test_dict_forward_pass_accuracy(self):
             torch.from_dlpack(tp_output), torch_output, rtol=rtol_
         ), "Forward pass outputs do not match."
 
-    def test_nested_forward_pass_accuracy(self):
+    def test_nested_forward_pass_accuracy(self, eager_or_compiled):
         torch_model = torch.nn.Sequential(
             torch.nn.Linear(1, 3, dtype=torch.float32, device="cuda"),
             torch.nn.Sequential(
@@ -97,7 +97,7 @@ def test_nested_forward_pass_accuracy(self):
         input_tensor = torch.tensor([[1.0]], dtype=torch.float32, device="cuda")
         tp_input = tp.Tensor(input_tensor, dtype=tp.float32)
 
-        tp_output = tp_model(tp_input)
+        tp_output = eager_or_compiled(tp_model, tp_input)
 
         torch_model.eval()
         with torch.no_grad():
diff --git a/tripy/tests/integration/test_slice.py b/tripy/tests/integration/test_slice.py
index 063b0245c..534ac34db 100644
--- a/tripy/tests/integration/test_slice.py
+++ b/tripy/tests/integration/test_slice.py
@@ -69,25 +69,31 @@ class TestSliceOp:
             ((5,), lambda t: t[-12:-5:-1]),
         ],
     )
-    def test_static_slice_op(self, dims_a, slice_func):
+    def test_static_slice_op(self, dims_a, slice_func, eager_or_compiled):
         a_cp = cp.arange(np.prod(dims_a)).reshape(dims_a).astype(np.float32)
         a = tp.Tensor(a_cp, device=tp.device("gpu"))
 
         def func(a):
             return slice_func(a)
 
-        out = func(a)
+        out = eager_or_compiled(func, a)
         assert np.array_equal(cp.from_dlpack(out).get(), slice_func(a_cp).get())
 
-    def test_slice_as_gather(self):
+    def test_slice_as_gather(self, eager_or_compiled):
         x_data = [0, 1, 2]
         y_data = [3, 4, 5]
         x = tp.Tensor(x_data)
         y = tp.Tensor(y_data)
+
+        def slice(y, x):
+            return y[x]
+
+        output = eager_or_compiled(slice, y, x)
+
         x_cp = cp.array(x_data)
         y_cp = cp.array(y_data)
 
-        assert np.array_equal(cp.from_dlpack(y[x]).get(), y_cp[x_cp].get())
+        assert np.array_equal(cp.from_dlpack(output).get(), y_cp[x_cp].get())
 
         x_shape = (2, 2)
         y_shape = (4, 3, 2)
@@ -95,7 +101,9 @@ def test_slice_as_gather(self):
         y_vol = math.prod(y_shape)
         x = tp.reshape(tp.arange(x_vol, dtype=tp.int32), x_shape)
         y = tp.reshape(tp.arange(y_vol), y_shape)
+        output = eager_or_compiled(slice, y, x)
+
         x_cp = cp.arange(x_vol, dtype=cp.int32).reshape(x_shape)
         y_cp = cp.arange(y_vol).reshape(y_shape)
 
-        assert np.array_equal(cp.from_dlpack(y[x]).get(), y_cp[x_cp].get())
+        assert np.array_equal(cp.from_dlpack(output).get(), y_cp[x_cp].get())
diff --git a/tripy/tests/integration/test_split.py b/tripy/tests/integration/test_split.py
index 9279c98fb..f6e7ad369 100644
--- a/tripy/tests/integration/test_split.py
+++ b/tripy/tests/integration/test_split.py
@@ -43,16 +43,21 @@ class TestSplitOp:
             ((12, 12), (3, 1), lambda t: (t[:, :4], t[:, 4:8], t[:, 8:])),
             ((12, 12), ([3], 1), lambda t: (t[:, :3], t[:, 3:])),
             ((12, 12), (4, 0), lambda t: (t[:3, :], t[3:6, :], t[6:9, :], t[9:12, :])),
-            ((3, 0), (5, 1), lambda t: (t[:, :0], t[:, 0:0], t[:, 0:0], t[:, 0:0], t[:, 0:0])),
+            pytest.param(
+                (3, 0),
+                (5, 1),
+                lambda t: (t[:, :0], t[:, 0:0], t[:, 0:0], t[:, 0:0], t[:, 0:0]),
+                marks=pytest.mark.skip(reason="https://github.com/NVIDIA/TensorRT-Incubator/issues/398"),
+            ),
         ],
     )
-    def test_split_static(self, dims_a, split_params, reference_slices):
+    def test_split_static(self, dims_a, split_params, reference_slices, eager_or_compiled):
         a_cp = cp.arange(np.prod(dims_a)).reshape(dims_a).astype(cp.float32)
         a = tp.Tensor(a_cp, device=tp.device("gpu"))
 
         def func(t):
             return tp.split(t, split_params[0], split_params[1])
 
-        out = func(a)
+        out = eager_or_compiled(func, a)
         reference_out = reference_slices(a_cp)
         compare_split_results(out, reference_out)
diff --git a/tripy/tests/integration/test_stack.py b/tripy/tests/integration/test_stack.py
index be1f724b5..796bcc26b 100644
--- a/tripy/tests/integration/test_stack.py
+++ b/tripy/tests/integration/test_stack.py
@@ -33,9 +33,9 @@ class TestStack:
             ([(2, 3, 4)], 0),
         ],
     )
-    def test_stack(self, tensor_shapes, dim):
+    def test_stack(self, tensor_shapes, dim, eager_or_compiled):
         tensors = [tp.ones(shape) for shape in tensor_shapes]
-        out = tp.stack(tensors, dim=dim)
+        out = eager_or_compiled(tp.stack, tensors, dim=dim)
 
         # Create numpy arrays for comparison
         np_tensors = [np.ones(shape) for shape in tensor_shapes]
@@ -44,13 +44,13 @@ def test_stack(self, tensor_shapes, dim):
         assert out.shape == list(expected.shape)
         assert np.array_equal(cp.from_dlpack(out).get(), expected)
 
-    def test_stack_different_ranks(self):
+    def test_stack_different_ranks(self, eager_or_compiled):
         tensors = [tp.ones((2, 3)), tp.ones((2, 3, 4))]
         with raises(
             tp.TripyException,
             match="Expected all input tensors to have the same rank.",
         ):
-            tp.stack(tensors)
+            eager_or_compiled(tp.stack, tensors)
 
     def test_stack_different_shapes(self):
         a = tp.ones((2, 3))
diff --git a/tripy/tests/integration/test_unary_elementwise.py b/tripy/tests/integration/test_unary_elementwise.py
index e01ca3fff..e89a37d6c 100644
--- a/tripy/tests/integration/test_unary_elementwise.py
+++ b/tripy/tests/integration/test_unary_elementwise.py
@@ -35,7 +35,7 @@
 
 class TestUnaryElementwise:
     @pytest.mark.parametrize("tp_func, np_func", [(tp_func, np_func) for tp_func, np_func in _UNARY_OPS.items()])
-    def test_op_funcs(self, tp_func, np_func):
+    def test_op_funcs(self, tp_func, np_func, eager_or_compiled):
         input = tp.arange(1, 4, dtype=tp.float32)
-        output = tp_func(input)
+        output = eager_or_compiled(tp_func, input)
         assert tp.allclose(output, tp.Tensor(np_func(cp.from_dlpack(input).get())))
diff --git a/tripy/tests/integration/test_unsqueeze.py b/tripy/tests/integration/test_unsqueeze.py
index e25d459b1..4402449fc 100644
--- a/tripy/tests/integration/test_unsqueeze.py
+++ b/tripy/tests/integration/test_unsqueeze.py
@@ -24,13 +24,13 @@
 
 class TestUnsqueezeOp:
     @pytest.mark.parametrize("axis", [-1, 0, 2])
-    def test_unsqueeze_dynamic_op(self, axis):
+    def test_unsqueeze_dynamic_op(self, axis, eager_or_compiled):
         def func(a):
             return tp.unsqueeze(a, dim=axis)
 
         inp = np.ones((4, 2, 2, 3), dtype=np.float32)
 
-        out = func(tp.Tensor(inp))
+        out = eager_or_compiled(func, tp.Tensor(inp))
         ref_out = np.expand_dims(inp, axis=axis)
         assert tp.allclose(out, tp.Tensor(ref_out))
 
diff --git a/tripy/tests/integration/test_where_op.py b/tripy/tests/integration/test_where_op.py
index 36d4839f5..5f37b5724 100644
--- a/tripy/tests/integration/test_where_op.py
+++ b/tripy/tests/integration/test_where_op.py
@@ -35,19 +35,19 @@ class TestWhereOp:
             ((0,), (1,), (1,)),  # 0 dim in the condition
         ],
     )
-    def test_where_broadcast_shapes(self, cond, x, y):
+    def test_where_broadcast_shapes(self, cond, x, y, eager_or_compiled):
         x = np.arange(np.prod(x)).reshape(x).astype(np.float32)
         y = np.arange(np.prod(y)).reshape(y).astype(np.float32)
         t_cond = np.arange(np.prod(cond)).reshape(cond).astype(np.float32)
         a = Tensor(x)
         b = Tensor(y)
         condition = Tensor(t_cond % 2 == 0)
-        out = tp.where(condition, a, b)
+        out = eager_or_compiled(tp.where, condition, a, b)
         assert np.array_equal(cp.from_dlpack(out).get(), np.array(np.where((t_cond % 2 == 0), x, y)))
 
-    def test_explicit_condition(self):
+    def test_explicit_condition(self, eager_or_compiled):
         select_indices = tp.Tensor([True, False, True, False], dtype=tp.bool)
         ones = tp.ones((4,), dtype=tp.int32)
         zeros = tp.zeros((4,), dtype=tp.int32)
-        w = tp.where(select_indices, ones, zeros)
+        w = eager_or_compiled(tp.where, select_indices, ones, zeros)
         assert cp.from_dlpack(w).get().tolist() == [1, 0, 1, 0]