diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index ed41cae..a6874bf 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -37,7 +37,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         with:
-          depwarn: error
+          depwarn: yes
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v4
         with:
diff --git a/Project.toml b/Project.toml
index 345acd1..2ccbfa4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,24 +23,24 @@ cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [compat]
 Aqua = "0.8"
-CUDA = "4.4, 5"
-DiffEqBase = "6.137"
+CUDA = "5.4.2"
+DiffEqBase = "6.151"
 Distributions = "v0.25.107"
-DocStringExtensions = "0.9"
-Flux = "0.13.12, 0.14"
-Functors = "0.4"
+DocStringExtensions = "0.9.3"
+Flux = "0.14.16"
+Functors = "0.4.11"
 LinearAlgebra = "1.10"
 Random = "1.10"
-Reexport = "1"
+Reexport = "1.2.2"
 SafeTestsets = "0.1"
-SciMLSensitivity = "7.49"
+SciMLSensitivity = "7.62"
 SparseArrays = "1.10"
 Statistics = "1.10"
-StochasticDiffEq = "6.63"
+StochasticDiffEq = "6.66"
 Test = "1.10"
-Tracker = "0.2.18"
-Zygote = "0.6.61"
-cuDNN = "1.1"
+Tracker = "0.2.34"
+Zygote = "0.6.70"
+cuDNN = "1.3.2"
 julia = "1.10"
 
 [extras]
diff --git a/docs/Project.toml b/docs/Project.toml
index c6d1296..45b25ee 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,9 +1,10 @@
 [deps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 HighDimPDE = "57c578d5-59d4-4db8-a490-a9fc372d19d2"
 
 [compat]
+CUDA = "5"
 Documenter = "1"
 Flux = "0.13, 0.14"
-HighDimPDE = "2"
diff --git a/src/DeepBSDE.jl b/src/DeepBSDE.jl
index c421b21..3bf25c7 100644
--- a/src/DeepBSDE.jl
+++ b/src/DeepBSDE.jl
@@ -137,32 +137,32 @@ function DiffEqBase.solve(prob::ParabolicPDEProblem,
         u = h[end]
         X = h[1:(end - 1)].data
         t_ = eltype(X)(t)
-        _σᵀ∇u = σᵀ∇u([X; t_])' |> collect
-        _f = -f(X, u.data, _σᵀ∇u, p, t_)
-
+        _σᵀ∇u = re2(p)([X; t_])' |> collect
+        _f = -f(X, u.data, _σᵀ∇u, re3(p), t_)
         Tracker.TrackedArray(vcat(μ(X, p, t), [_f]))
     end
 
     function G(h::Tracker.TrackedArray, p, t)
         X = h[1:(end - 1)].data
         t_ = eltype(X)(t)
-        _σᵀ∇u = σᵀ∇u([X; t_])' |> collect
-        Tracker.TrackedArray(vcat(σ(X, p, t), _σᵀ∇u))
+        _σᵀ∇u = re2(p)([X; t_])' |> collect
+        Tracker.TrackedArray(vcat(σ(X, re3(p), t), _σᵀ∇u))
     end
 
     noise = zeros(Float32, d + 1, d)
-    prob = SDEProblem{false}(F, G, [x0; 0.0f0], tspan, p3, noise_rate_prototype = noise)
+    sde_prob = SDEProblem{false}(F, G, [x0; 0.0f0], tspan, p3, noise_rate_prototype = noise)
 
     function neural_sde(init_cond)
-        map(1:trajectories) do j #TODO add Ensemble Simulation
-            predict_ans = Array(solve(prob, sdealg;
-                dt = dt,
-                u0 = init_cond,
-                p = p3,
-                save_everystep = false,
-                sensealg = SciMLSensitivity.TrackerAdjoint(),
-                kwargs...))[:, end]
-            (X, u) = (predict_ans[1:(end - 1)], predict_ans[end])
+        sde_prob = remake(sde_prob, u0 = init_cond)
+        ensemble_prob = EnsembleProblem(sde_prob)
+        sol = solve(ensemble_prob, sdealg, EnsembleSerial(); 
+            u0 = init_cond, trajectories = trajectories, dt = dt, p = p3, 
+            sensealg = SciMLSensitivity.TrackerAdjoint(), 
+            save_everystep = false,
+            kwargs...)
+        map(sol) do _sol 
+            predict_ans = Array(_sol)
+            (predict_ans[1:(end - 1), end], predict_ans[end, end])
         end
     end
 
@@ -185,7 +185,6 @@ function DiffEqBase.solve(prob::ParabolicPDEProblem,
         verbose && println("Current loss is: $l")
         l < pabstol && Flux.stop()
     end
-
     verbose && println("DeepBSDE")
     Flux.train!(loss_n_sde, ps, data, opt; cb = cb)
 
diff --git a/test/DeepBSDE.jl b/test/DeepBSDE.jl
index 5684153..c1cd31c 100644
--- a/test/DeepBSDE.jl
+++ b/test/DeepBSDE.jl
@@ -21,16 +21,16 @@ end
     tspan = (0.0f0, 5.0f0)
     dt = 0.5f0 # time step
     d = 1 # number of dimensions
-    m = 10 # number of trajectories (batch size)
+    m = 100 # number of trajectories (batch size)
 
     g(X) = sum(X .^ 2)   # terminal condition
-    f(X, u, σᵀ∇u, p, t) = Float32(0.0)
-    μ_f(X, p, t) = zero(X) #Vector d x 1
-    σ_f(X, p, t) = Diagonal(ones(Float32, d)) |> Matrix #Matrix d x d
-    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan, g, f)
+    f(X, u, σᵀ∇u, p, t) = eltype(X)(0.0)
+    μ_f(X, p, t) = X*0.0f0 #Vector d x 1
+    σ_f(X, p, t) = Diagonal(ones(eltype(X), d)) |> Matrix #Matrix d x d
+    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan; g, f)
 
     hls = 10 + d #hidden layer size
-    opt = Flux.Optimise.Adam(0.005)  #optimizer
+    opt = Flux.Optimise.Adam(0.05)  #optimizer
     #sub-neural network approximating solutions at the desired point
     u0 = Flux.Chain(Dense(d, hls, relu),
         Dense(hls, hls, relu),
@@ -69,7 +69,7 @@ end
     f(X, u, σᵀ∇u, p, t) = Float32(0.0)
     μ_f(X, p, t) = zero(X) #Vector d x 1
     σ_f(X, p, t) = Diagonal(ones(Float32, d)) |> Matrix #Matrix d x d
-    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan, g, f)
+    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan; g, f)
 
     hls = 10 + d #hidden layer size
     opt = Flux.Optimise.Adam(0.005)  #optimizer
@@ -113,7 +113,7 @@ end
     g(X) = sum(X .^ 2)
     μ_f(X, p, t) = zero(X) #Vector d x 1
     σ_f(X, p, t) = Diagonal(sigma * X) |> Matrix #Matrix d x d
-    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan, g, f)
+    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan; g, f)
 
     hls = 10 + d #hide layer size
     opt = Flux.Optimise.Adam(0.001)
@@ -153,7 +153,7 @@ end
     f(X, u, σᵀ∇u, p, t) = u .- u .^ 3
     μ_f(X, p, t) = zero(X) #Vector d x 1
     σ_f(X, p, t) = Diagonal(ones(Float32, d)) |> Matrix #Matrix d x d
-    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan, g, f)
+    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan; g, f)
 
     hls = 20 + d #hidden layer size
     opt = Flux.Optimise.Adam(5^-3)  #optimizer
@@ -195,7 +195,7 @@ end
     f(X, u, σᵀ∇u, p, t) = -λ * sum(σᵀ∇u .^ 2)
     μ_f(X, p, t) = zero(X)  #Vector d x 1 λ
     σ_f(X, p, t) = Diagonal(sqrt(2.0f0) * ones(Float32, d)) #Matrix d x d
-    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan, g, f)
+    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan; g, f)
 
     # TODO: This is a very large neural networks which size must be reduced.
     hls = 256 #hidden layer size
@@ -266,7 +266,7 @@ end
 
     μ_f(X, p, t) = µc * X #Vector d x 1
     σ_f(X, p, t) = σc * Diagonal(X) #Matrix d x d
-    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan, g, f)
+    prob = ParabolicPDEProblem(μ_f, σ_f, x0, tspan; g, f)
 
     hls = 256 #hidden layer size
     opt = Flux.Optimise.Adam(0.008)  #optimizer
diff --git a/test/runtests.jl b/test/runtests.jl
index e8f7c0d..f9e1c9a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -6,6 +6,7 @@ using SafeTestsets, Test
     @time @safetestset "reflect" include("reflect.jl")
     @time @safetestset "MLP" include("MLP.jl")
     @time @safetestset "Deep Splitting" include("DeepSplitting.jl")
+    @time @safetestset "Deep Splitting" include("DeepBSDE.jl")
     @time @safetestset "MC Sample" include("MCSample.jl")
     @time @safetestset "NNStopping" include("NNStopping.jl")
     @time @safetestset "NNKolmogorov" include("NNKolmogorov.jl")