From f7421f8c9cc620f829b3e3ce86d5ff12a86c1412 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Wed, 24 Jul 2024 17:19:30 +0100 Subject: [PATCH 01/22] slightly more thread safe gc --- src/C/pointers.jl | 2 ++ src/GC/GC.jl | 63 +++++++++++++++++++++++++----------- test/finalize_test_script.jl | 24 ++++++++++++++ 3 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 test/finalize_test_script.jl diff --git a/src/C/pointers.jl b/src/C/pointers.jl index dd0476fc..6faabb60 100644 --- a/src/C/pointers.jl +++ b/src/C/pointers.jl @@ -22,6 +22,8 @@ const CAPI_FUNC_SIGS = Dict{Symbol,Pair{Tuple,Type}}( :PyEval_RestoreThread => (Ptr{Cvoid},) => Cvoid, :PyGILState_Ensure => () => PyGILState_STATE, :PyGILState_Release => (PyGILState_STATE,) => Cvoid, + :PyGILState_GetThisThreadState => () => Ptr{Cvoid}, + :PyGILState_Check => () => Cint, # IMPORT :PyImport_ImportModule => (Ptr{Cchar},) => PyPtr, :PyImport_Import => (PyPtr,) => PyPtr, diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 0d1fa9a8..48e70544 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -39,25 +39,36 @@ Like most PythonCall functions, you must only call this from the main thread. """ function enable() ENABLED[] = true - if !isempty(QUEUE) - C.with_gil(false) do - for ptr in QUEUE - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end - end + if !isempty(QUEUE) && C.PyGILState_Check() == 1 + free_queue() + end + return +end + +function free_queue() + for ptr in QUEUE + if ptr != C.PyNULL + C.Py_DecRef(ptr) end end empty!(QUEUE) - return + nothing +end + +function gc() + if ENABLED[] && C.PyGILState_Check() == 1 + free_queue() + true + else + false + end end function enqueue(ptr::C.PyPtr) if ptr != C.PyNULL && C.CTX.is_initialized - if ENABLED[] - C.with_gil(false) do - C.Py_DecRef(ptr) - end + if ENABLED[] && C.PyGILState_Check() == 1 + C.Py_DecRef(ptr) + isempty(QUEUE) || free_queue() else push!(QUEUE, ptr) end @@ -67,14 +78,13 @@ end function enqueue_all(ptrs) if C.CTX.is_initialized - if ENABLED[] - C.with_gil(false) do - for ptr in ptrs - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end + if ENABLED[] && C.PyGILState_Check() == 1 + for ptr in ptrs + if ptr != C.PyNULL + C.Py_DecRef(ptr) end end + isempty(QUEUE) || free_queue() else append!(QUEUE, ptrs) end @@ -82,4 +92,21 @@ function enqueue_all(ptrs) return end +mutable struct GCHook + function GCHook() + finalizer(_gchook_finalizer, new()) + end +end + +function _gchook_finalizer(x) + gc() + finalizer(_gchook_finalizer, x) + nothing +end + +function __init__() + GCHook() + nothing +end + end # module GC diff --git a/test/finalize_test_script.jl b/test/finalize_test_script.jl new file mode 100644 index 00000000..28d4dd72 --- /dev/null +++ b/test/finalize_test_script.jl @@ -0,0 +1,24 @@ +using PythonCall + +# This would consistently segfault pre-GC-thread-safety +let + pyobjs = map(pylist, 1:100) + Threads.@threads for obj in pyobjs + finalize(obj) + end +end + +@show PythonCall.GC.ENABLED[] +@show length(PythonCall.GC.QUEUE) +GC.gc(false) +# with GCHook, the queue should be empty now (the above gc() triggered GCHook to clear the PythonCall QUEUE) +# without GCHook, gc() has no effect on the QUEUE +@show length(PythonCall.GC.QUEUE) +GC.gc(false) +@show length(PythonCall.GC.QUEUE) +GC.gc(false) +@show length(PythonCall.GC.QUEUE) +# with GCHook this is not necessary, GC.gc() is enough +# without GCHook, this is required to free any objects in the PythonCall QUEUE +PythonCall.GC.gc() +@show length(PythonCall.GC.QUEUE) From 3bcd028cd81c5c547f82dcc7cfa49cf7d71c06a9 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Mon, 29 Jul 2024 21:49:01 +0100 Subject: [PATCH 02/22] use Channel not Vector and make disable/enable a no-op --- src/GC/GC.jl | 100 +++++++++++++++++++---------------- test/finalize_test_script.jl | 17 +++--- 2 files changed, 62 insertions(+), 55 deletions(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 48e70544..357e98d0 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -3,93 +3,97 @@ Garbage collection of Python objects. -See `disable` and `enable`. +See [`enable`](@ref), [`disable`](@ref) and [`gc`](@ref). """ module GC using ..C: C -const ENABLED = Ref(true) -const QUEUE = C.PyPtr[] +const QUEUE = Channel{C.PyPtr}(Inf) +const HOOK = WeakRef() """ PythonCall.GC.disable() -Disable the PythonCall garbage collector. +Do nothing. -This means that whenever a Python object owned by Julia is finalized, it is not immediately -freed but is instead added to a queue of objects to free later when `enable()` is called. +!!! note -Like most PythonCall functions, you must only call this from the main thread. + Historically this would disable the PythonCall garbage collector. This was required + for safety in multi-threaded code but is no longer needed, so this is now a no-op. """ -function disable() - ENABLED[] = false - return -end +disable() = nothing """ PythonCall.GC.enable() -Re-enable the PythonCall garbage collector. +Do nothing. -This frees any Python objects which were finalized while the GC was disabled, and allows -objects finalized in the future to be freed immediately. +!!! note -Like most PythonCall functions, you must only call this from the main thread. + Historically this would enable the PythonCall garbage collector. This was required + for safety in multi-threaded code but is no longer needed, so this is now a no-op. """ -function enable() - ENABLED[] = true - if !isempty(QUEUE) && C.PyGILState_Check() == 1 - free_queue() - end - return -end +enable() = nothing -function free_queue() - for ptr in QUEUE - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end +""" + PythonCall.GC.gc() + +Free any Python objects waiting to be freed. + +These are objects that were finalized from a thread that was not holding the Python +GIL at the time. + +Like most PythonCall functions, this must only be called from the main thread (i.e. the +thread currently holding the Python GIL.) +""" +function gc() + if C.CTX.is_initialized + unsafe_free_queue() end - empty!(QUEUE) nothing end -function gc() - if ENABLED[] && C.PyGILState_Check() == 1 - free_queue() - true - else - false +function unsafe_free_queue() + if isready(QUEUE) + @lock QUEUE while isready(QUEUE) + ptr = take!(QUEUE) + if ptr != C.PyNULL + C.Py_DecRef(ptr) + end + end end + nothing end function enqueue(ptr::C.PyPtr) if ptr != C.PyNULL && C.CTX.is_initialized - if ENABLED[] && C.PyGILState_Check() == 1 + if C.PyGILState_Check() == 1 C.Py_DecRef(ptr) - isempty(QUEUE) || free_queue() + unsafe_free_queue() else - push!(QUEUE, ptr) + put!(QUEUE, ptr) end end - return + nothing end function enqueue_all(ptrs) - if C.CTX.is_initialized - if ENABLED[] && C.PyGILState_Check() == 1 + if any(ptr -> ptr != C.PYNULL, ptrs) && C.CTX.is_initialized + if C.PyGILState_Check() == 1 for ptr in ptrs if ptr != C.PyNULL C.Py_DecRef(ptr) end end - isempty(QUEUE) || free_queue() + unsafe_free_queue() else - append!(QUEUE, ptrs) + for ptr in ptrs + put!(QUEUE, ptr) + end end end - return + nothing end mutable struct GCHook @@ -99,13 +103,17 @@ mutable struct GCHook end function _gchook_finalizer(x) - gc() - finalizer(_gchook_finalizer, x) + if C.CTX.is_initialized + finalizer(_gchook_finalizer, x) + if isready(QUEUE) && C.PyGILState_Check() == 1 + unsafe_free_queue() + end + end nothing end function __init__() - GCHook() + HOOK.value = GCHook() nothing end diff --git a/test/finalize_test_script.jl b/test/finalize_test_script.jl index 28d4dd72..41efbc79 100644 --- a/test/finalize_test_script.jl +++ b/test/finalize_test_script.jl @@ -8,17 +8,16 @@ let end end -@show PythonCall.GC.ENABLED[] -@show length(PythonCall.GC.QUEUE) -GC.gc(false) +@show isready(PythonCall.GC.QUEUE) +GC.gc() # with GCHook, the queue should be empty now (the above gc() triggered GCHook to clear the PythonCall QUEUE) # without GCHook, gc() has no effect on the QUEUE -@show length(PythonCall.GC.QUEUE) -GC.gc(false) -@show length(PythonCall.GC.QUEUE) -GC.gc(false) -@show length(PythonCall.GC.QUEUE) +@show isready(PythonCall.GC.QUEUE) +GC.gc() +@show isready(PythonCall.GC.QUEUE) +GC.gc() +@show isready(PythonCall.GC.QUEUE) # with GCHook this is not necessary, GC.gc() is enough # without GCHook, this is required to free any objects in the PythonCall QUEUE PythonCall.GC.gc() -@show length(PythonCall.GC.QUEUE) +@show isready(PythonCall.GC.QUEUE) From 8ca05c9ac7bc2beaed7cc86a00a2a7719d201cd2 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Mon, 29 Jul 2024 21:56:30 +0100 Subject: [PATCH 03/22] document GCHook --- src/GC/GC.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 357e98d0..0163cb0f 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -96,6 +96,16 @@ function enqueue_all(ptrs) nothing end +""" + GCHook() + +An immortal object which frees any pending Python objects when Julia's GC runs. + +This works by creating it but not holding any strong reference to it, so it is eligible +to be finalized by Julia's GC. The finalizer empties the PythonCall GC queue if +possible. The finalizer also re-attaches itself, so the object does not actually get +collected and so the finalizer will run again at next GC. +""" mutable struct GCHook function GCHook() finalizer(_gchook_finalizer, new()) From e230ce9a3b8cb5f01e730fd22bb3138c8f5f234b Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Mon, 29 Jul 2024 22:10:27 +0100 Subject: [PATCH 04/22] cannot lock channels on julia 1.6 --- src/GC/GC.jl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 0163cb0f..771b4050 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -55,12 +55,10 @@ function gc() end function unsafe_free_queue() - if isready(QUEUE) - @lock QUEUE while isready(QUEUE) - ptr = take!(QUEUE) - if ptr != C.PyNULL - C.Py_DecRef(ptr) - end + while isready(QUEUE) + ptr = take!(QUEUE) + if ptr != C.PyNULL + C.Py_DecRef(ptr) end end nothing From a36d7c09314d101cdd60cecaade03cb6840a0b80 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Wed, 31 Jul 2024 19:25:41 +0100 Subject: [PATCH 05/22] revert to using a vector for the queue --- src/GC/GC.jl | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 771b4050..a076f3ed 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -9,7 +9,8 @@ module GC using ..C: C -const QUEUE = Channel{C.PyPtr}(Inf) +const QUEUE = C.PyPtr[] +const QUEUE_LOCK = Threads.SpinLock() const HOOK = WeakRef() """ @@ -55,12 +56,14 @@ function gc() end function unsafe_free_queue() - while isready(QUEUE) - ptr = take!(QUEUE) + lock(QUEUE_LOCK) + for ptr in QUEUE if ptr != C.PyNULL C.Py_DecRef(ptr) end end + empty!(QUEUE) + unlock(QUEUE_LOCK) nothing end @@ -68,9 +71,13 @@ function enqueue(ptr::C.PyPtr) if ptr != C.PyNULL && C.CTX.is_initialized if C.PyGILState_Check() == 1 C.Py_DecRef(ptr) - unsafe_free_queue() + if !isempty(QUEUE) + unsafe_free_queue() + end else - put!(QUEUE, ptr) + lock(QUEUE_LOCK) + push!(QUEUE, ptr) + unlock(QUEUE_LOCK) end end nothing @@ -84,11 +91,13 @@ function enqueue_all(ptrs) C.Py_DecRef(ptr) end end - unsafe_free_queue() - else - for ptr in ptrs - put!(QUEUE, ptr) + if !isempty(QUEUE) + unsafe_free_queue() end + else + lock(QUEUE_LOCK) + append!(QUEUE, ptrs) + unlock(QUEUE_LOCK) end end nothing @@ -113,7 +122,7 @@ end function _gchook_finalizer(x) if C.CTX.is_initialized finalizer(_gchook_finalizer, x) - if isready(QUEUE) && C.PyGILState_Check() == 1 + if !isempty(QUEUE) && C.PyGILState_Check() == 1 unsafe_free_queue() end end From a5a2c96bfa41bbbe814f466f848f754ea67bfc5d Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Wed, 31 Jul 2024 19:44:34 +0100 Subject: [PATCH 06/22] restore test script --- test/finalize_test_script.jl | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/test/finalize_test_script.jl b/test/finalize_test_script.jl index 41efbc79..ecacad9e 100644 --- a/test/finalize_test_script.jl +++ b/test/finalize_test_script.jl @@ -7,17 +7,3 @@ let finalize(obj) end end - -@show isready(PythonCall.GC.QUEUE) -GC.gc() -# with GCHook, the queue should be empty now (the above gc() triggered GCHook to clear the PythonCall QUEUE) -# without GCHook, gc() has no effect on the QUEUE -@show isready(PythonCall.GC.QUEUE) -GC.gc() -@show isready(PythonCall.GC.QUEUE) -GC.gc() -@show isready(PythonCall.GC.QUEUE) -# with GCHook this is not necessary, GC.gc() is enough -# without GCHook, this is required to free any objects in the PythonCall QUEUE -PythonCall.GC.gc() -@show isready(PythonCall.GC.QUEUE) From f021072db70668a60364e51b6a806cdf3f46658d Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Thu, 1 Aug 2024 21:32:36 +0100 Subject: [PATCH 07/22] combine queue into a single item --- src/GC/GC.jl | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index a076f3ed..898a7eac 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -9,8 +9,7 @@ module GC using ..C: C -const QUEUE = C.PyPtr[] -const QUEUE_LOCK = Threads.SpinLock() +const QUEUE = (; items = C.PyPtr[], lock = Threads.SpinLock()) const HOOK = WeakRef() """ @@ -56,14 +55,14 @@ function gc() end function unsafe_free_queue() - lock(QUEUE_LOCK) - for ptr in QUEUE + lock(QUEUE.lock) + for ptr in QUEUE.items if ptr != C.PyNULL C.Py_DecRef(ptr) end end - empty!(QUEUE) - unlock(QUEUE_LOCK) + empty!(QUEUE.items) + unlock(QUEUE.lock) nothing end @@ -71,13 +70,13 @@ function enqueue(ptr::C.PyPtr) if ptr != C.PyNULL && C.CTX.is_initialized if C.PyGILState_Check() == 1 C.Py_DecRef(ptr) - if !isempty(QUEUE) + if !isempty(QUEUE.items) unsafe_free_queue() end else - lock(QUEUE_LOCK) - push!(QUEUE, ptr) - unlock(QUEUE_LOCK) + lock(QUEUE.lock) + push!(QUEUE.items, ptr) + unlock(QUEUE.lock) end end nothing @@ -91,13 +90,13 @@ function enqueue_all(ptrs) C.Py_DecRef(ptr) end end - if !isempty(QUEUE) + if !isempty(QUEUE.items) unsafe_free_queue() end else - lock(QUEUE_LOCK) - append!(QUEUE, ptrs) - unlock(QUEUE_LOCK) + lock(QUEUE.lock) + append!(QUEUE.items, ptrs) + unlock(QUEUE.lock) end end nothing @@ -122,7 +121,7 @@ end function _gchook_finalizer(x) if C.CTX.is_initialized finalizer(_gchook_finalizer, x) - if !isempty(QUEUE) && C.PyGILState_Check() == 1 + if !isempty(QUEUE.items) && C.PyGILState_Check() == 1 unsafe_free_queue() end end From 4b3bd65bf89e5edc0746c6e67cfd98528f19c003 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Thu, 1 Aug 2024 21:37:12 +0100 Subject: [PATCH 08/22] prefer Fix2 over anonymous function --- src/GC/GC.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 898a7eac..9ac93bcf 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -83,7 +83,7 @@ function enqueue(ptr::C.PyPtr) end function enqueue_all(ptrs) - if any(ptr -> ptr != C.PYNULL, ptrs) && C.CTX.is_initialized + if any(!=(C.PYNULL), ptrs) && C.CTX.is_initialized if C.PyGILState_Check() == 1 for ptr in ptrs if ptr != C.PyNULL From 56aa9bc863498337f55ff720cbeadfec984009ca Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Thu, 1 Aug 2024 21:53:02 +0100 Subject: [PATCH 09/22] update docs --- docs/src/faq.md | 30 +++++++++++++++++------------- docs/src/releasenotes.md | 8 ++++++++ src/GC/GC.jl | 2 +- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/docs/src/faq.md b/docs/src/faq.md index 981aa1ed..b51717d0 100644 --- a/docs/src/faq.md +++ b/docs/src/faq.md @@ -4,19 +4,23 @@ No. -Some rules if you are writing multithreaded code: -- Only call Python functions from the first thread. -- You probably also need to call `PythonCall.GC.disable()` on the main thread before any - threaded block of code. Remember to call `PythonCall.GC.enable()` again afterwards. - (This is because Julia finalizers can be called from any thread.) -- Julia intentionally causes segmentation faults as part of the GC safepoint mechanism. - If unhandled, these segfaults will result in termination of the process. To enable signal handling, - set `PYTHON_JULIACALL_HANDLE_SIGNALS=yes` before any calls to import juliacall. This is equivalent - to starting julia with `julia --handle-signals=yes`, the default behavior in Julia. - See discussion [here](https://github.com/JuliaPy/PythonCall.jl/issues/219#issuecomment-1605087024) for more information. -- You may still encounter problems. - -Related issues: [#201](https://github.com/JuliaPy/PythonCall.jl/issues/201), [#202](https://github.com/JuliaPy/PythonCall.jl/issues/202) +However it is safe to use PythonCall with Julia with multiple threads, provided you only +call Python code from the first thread. (Before v0.9.22, tricks such as disabling the +garbage collector were required.) + +From Python, to use JuliaCall with multiple threads you probably need to set +[`PYTHON_JULIACALL_HANDLE_SIGNALS=yes`](@ref julia-config) before importing JuliaCall. +This is because Julia intentionally causes segmentation faults as part of the GC +safepoint mechanism. If unhandled, these segfaults will result in termination of the +process. This is equivalent to starting julia with `julia --handle-signals=yes`, the +default behavior in Julia. See discussion +[here](https://github.com/JuliaPy/PythonCall.jl/issues/219#issuecomment-1605087024) +for more information. + +Related issues: +[#201](https://github.com/JuliaPy/PythonCall.jl/issues/201), +[#202](https://github.com/JuliaPy/PythonCall.jl/issues/202), +[#529](https://github.com/JuliaPy/PythonCall.jl/pull/529) ## Issues when Numpy arrays are expected diff --git a/docs/src/releasenotes.md b/docs/src/releasenotes.md index 500dbf98..33141da8 100644 --- a/docs/src/releasenotes.md +++ b/docs/src/releasenotes.md @@ -1,5 +1,13 @@ # Release Notes +## Unreleased +* Finalizers are now thread-safe, meaning PythonCall now works in the presence of + multi-threaded Julia code. Previously, tricks such as disabling the garbage collector + were required. Python code must still be called on the main thread. +* `GC.disable()` and `GC.enable()` are now a no-op and deprecated since they are no + longer required for thread-safety. These will be removed in v1. +* Adds `GC.gc()`. + ## 0.9.21 (2024-07-20) * `Serialization.serialize` can use `dill` instead of `pickle` by setting the env var `JULIA_PYTHONCALL_PICKLE=dill`. * `numpy.bool_` can now be converted to `Bool` and other number types. diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 9ac93bcf..2a8d53ac 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -3,7 +3,7 @@ Garbage collection of Python objects. -See [`enable`](@ref), [`disable`](@ref) and [`gc`](@ref). +See [`gc`](@ref). """ module GC From 4fdcf310d75c92edd757233e2f5f267a09f6a59a Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Thu, 1 Aug 2024 22:02:40 +0100 Subject: [PATCH 10/22] test multithreaded --- .github/workflows/tests-nightly.yml | 1 + .github/workflows/tests.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 6a443463..f73f9a7d 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -38,6 +38,7 @@ jobs: - uses: julia-actions/julia-runtest@v1 env: JULIA_DEBUG: PythonCall + JULIA_NUM_THREADS: '2' - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v1 with: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a3462b48..bc4d52d0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -43,6 +43,7 @@ jobs: uses: julia-actions/julia-runtest@v1 env: JULIA_DEBUG: PythonCall + JULIA_NUM_THREADS: '2' - name: Process coverage uses: julia-actions/julia-processcoverage@v1 - name: Upload coverage to Codecov @@ -82,6 +83,8 @@ jobs: - name: Run tests run: | pytest -s --nbval --cov=pysrc ./pytest/ + env: + PYTHON_JULIACALL_THREADS: '2' - name: Upload coverage to Codecov uses: codecov/codecov-action@v2 env: From 9051769376a142d0811053419d9bbbf2d428d6d9 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Thu, 1 Aug 2024 22:13:17 +0100 Subject: [PATCH 11/22] test gc from python --- pytest/test_all.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pytest/test_all.py b/pytest/test_all.py index c6cff009..f94f91f6 100644 --- a/pytest/test_all.py +++ b/pytest/test_all.py @@ -75,3 +75,28 @@ def test_issue_433(): """ ) assert out == 25 + +def test_julia_gc(): + from juliacall import Main as jl + # We make a bunch of python objects with no reference to them, + # then call GC to try to finalize them. + # We want to make sure we don't segfault. + # Here we can (manually) verify that the background task is running successfully, + # by seeing the printout "Python GC (100 items): 0.000000 seconds." + # We also programmatically check things are working by verifying the queue is empty. + # Debugging note: if you get segfaults, then run the tests with + # `PYTHON_JULIACALL_HANDLE_SIGNALS=yes python3 -X faulthandler -m pytest -p no:faulthandler -s --nbval --cov=pysrc ./pytest/` + # in order to recover a bit more information from the segfault. + jl.seval( + """ + using PythonCall, Test + let + pyobjs = map(pylist, 1:100) + Threads.@threads for obj in pyobjs + finalize(obj) + end + end + GC.gc() + @test isempty(PythonCall.GC.QUEUE.items) + """ + ) From 13cc34648e8098b227a2d1d3778aa134196ad594 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Thu, 1 Aug 2024 22:22:36 +0100 Subject: [PATCH 12/22] add gc tests --- pytest/test_all.py | 1 + test/GC.jl | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/pytest/test_all.py b/pytest/test_all.py index f94f91f6..0916e918 100644 --- a/pytest/test_all.py +++ b/pytest/test_all.py @@ -96,6 +96,7 @@ def test_julia_gc(): finalize(obj) end end + Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) GC.gc() @test isempty(PythonCall.GC.QUEUE.items) """ diff --git a/test/GC.jl b/test/GC.jl index 46409041..93454100 100644 --- a/test/GC.jl +++ b/test/GC.jl @@ -1 +1,32 @@ -# TODO +@testset "201: GC segfaults" begin + # https://github.com/JuliaPy/PythonCall.jl/issues/201 + # This should not segfault! + cmd = Base.julia_cmd() + path = joinpath(@__DIR__, "finalize_test_script.jl") + p = run(`$cmd -t2 --project $path`) + @test p.exitcode == 0 +end + +@testset "GC.gc()" begin + let + pyobjs = map(pylist, 1:100) + Threads.@threads for obj in pyobjs + finalize(obj) + end + end + Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) + PythonCall.GC.gc() + @test isempty(PythonCall.GC.QUEUE.items) +end + +@testset "GC.GCHook" begin + let + pyobjs = map(pylist, 1:100) + Threads.@threads for obj in pyobjs + finalize(obj) + end + end + Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) + GC.gc() + @test isempty(PythonCall.GC.QUEUE.items) +end From 45bc71f40df841d60c00066847e7c672a8ab3b1c Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Thu, 1 Aug 2024 22:36:59 +0100 Subject: [PATCH 13/22] fix test --- pytest/test_all.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytest/test_all.py b/pytest/test_all.py index 0916e918..f94f91f6 100644 --- a/pytest/test_all.py +++ b/pytest/test_all.py @@ -96,7 +96,6 @@ def test_julia_gc(): finalize(obj) end end - Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) GC.gc() @test isempty(PythonCall.GC.QUEUE.items) """ From 4ec7def7a6ae2d54482781e5525646e8196b050c Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 17:53:34 +0100 Subject: [PATCH 14/22] add deprecation warnings --- src/GC/GC.jl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 2a8d53ac..365ab0fe 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -22,7 +22,13 @@ Do nothing. Historically this would disable the PythonCall garbage collector. This was required for safety in multi-threaded code but is no longer needed, so this is now a no-op. """ -disable() = nothing +function disable() + Base.depwarn( + "disabling the PythonCall GC is no longer needed for thread-safety", + :disable, + ) + nothing +end """ PythonCall.GC.enable() @@ -34,7 +40,13 @@ Do nothing. Historically this would enable the PythonCall garbage collector. This was required for safety in multi-threaded code but is no longer needed, so this is now a no-op. """ -enable() = nothing +function enable() + Base.depwarn( + "disabling the PythonCall GC is no longer needed for thread-safety", + :enable, + ) + nothing +end """ PythonCall.GC.gc() From eb6b9f0dc584ef2d34b71ac348e533ac52f6ae3e Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 17:54:49 +0100 Subject: [PATCH 15/22] safer locking (plus explanatory comments) --- src/GC/GC.jl | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 365ab0fe..98908b5d 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -67,28 +67,36 @@ function gc() end function unsafe_free_queue() - lock(QUEUE.lock) - for ptr in QUEUE.items - if ptr != C.PyNULL - C.Py_DecRef(ptr) + Base.@lock QUEUE.lock begin + for ptr in QUEUE.items + if ptr != C.PyNULL + C.Py_DecRef(ptr) + end end + empty!(QUEUE.items) end - empty!(QUEUE.items) - unlock(QUEUE.lock) nothing end function enqueue(ptr::C.PyPtr) + # If the ptr is NULL there is nothing to free. + # If C.CTX.is_initialized is false then the Python interpreter hasn't started yet + # or has been finalized; either way attempting to free will cause an error. if ptr != C.PyNULL && C.CTX.is_initialized if C.PyGILState_Check() == 1 + # If the current thread holds the GIL, then we can immediately free. C.Py_DecRef(ptr) + # We may as well also free any other enqueued objects. if !isempty(QUEUE.items) unsafe_free_queue() end else - lock(QUEUE.lock) - push!(QUEUE.items, ptr) - unlock(QUEUE.lock) + # Otherwise we push the pointer onto the queue to be freed later, either: + # (a) If a future Python object is finalized on the thread holding the GIL + # in the branch above. + # (b) If the GCHook() object below is finalized in an ordinary GC. + # (c) If the user calls PythonCall.GC.gc(). + Base.@lock QUEUE.lock push!(QUEUE.items, ptr) end end nothing @@ -106,9 +114,7 @@ function enqueue_all(ptrs) unsafe_free_queue() end else - lock(QUEUE.lock) - append!(QUEUE.items, ptrs) - unlock(QUEUE.lock) + Base.@lock QUEUE.lock append!(QUEUE.items, ptrs) end end nothing From a68015ecc145aa32eea71ccc8149258e68fe9303 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 17:55:06 +0100 Subject: [PATCH 16/22] ref of weakref --- src/GC/GC.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 98908b5d..e7e992a6 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -10,7 +10,7 @@ module GC using ..C: C const QUEUE = (; items = C.PyPtr[], lock = Threads.SpinLock()) -const HOOK = WeakRef() +const HOOK = Ref{WeakRef}() """ PythonCall.GC.disable() @@ -147,7 +147,7 @@ function _gchook_finalizer(x) end function __init__() - HOOK.value = GCHook() + HOOK[] = WeakRef(GCHook()) nothing end From ab560ac94c9026d120d8398a4f05eea22470a455 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 18:03:00 +0100 Subject: [PATCH 17/22] SpinLock -> ReentrantLock --- src/GC/GC.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index e7e992a6..3b25b760 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -9,7 +9,7 @@ module GC using ..C: C -const QUEUE = (; items = C.PyPtr[], lock = Threads.SpinLock()) +const QUEUE = (; items = C.PyPtr[], lock = ReentrantLock()) const HOOK = Ref{WeakRef}() """ From cd4db5c2f47fdf300916f68836de888784cf067c Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 18:34:06 +0100 Subject: [PATCH 18/22] SpinLock -> ReentrantLock --- src/GC/GC.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GC/GC.jl b/src/GC/GC.jl index 3b25b760..e7e992a6 100644 --- a/src/GC/GC.jl +++ b/src/GC/GC.jl @@ -9,7 +9,7 @@ module GC using ..C: C -const QUEUE = (; items = C.PyPtr[], lock = ReentrantLock()) +const QUEUE = (; items = C.PyPtr[], lock = Threads.SpinLock()) const HOOK = Ref{WeakRef}() """ From 31cd57da9db661da87339ee346c21bee454ddbe5 Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 21:05:36 +0100 Subject: [PATCH 19/22] typo: testset -> testitem --- test/GC.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/GC.jl b/test/GC.jl index 93454100..f53bfec5 100644 --- a/test/GC.jl +++ b/test/GC.jl @@ -1,4 +1,4 @@ -@testset "201: GC segfaults" begin +@testitem "201: GC segfaults" begin # https://github.com/JuliaPy/PythonCall.jl/issues/201 # This should not segfault! cmd = Base.julia_cmd() @@ -7,7 +7,7 @@ @test p.exitcode == 0 end -@testset "GC.gc()" begin +@testitem "GC.gc()" begin let pyobjs = map(pylist, 1:100) Threads.@threads for obj in pyobjs @@ -19,7 +19,7 @@ end @test isempty(PythonCall.GC.QUEUE.items) end -@testset "GC.GCHook" begin +@testitem "GC.GCHook" begin let pyobjs = map(pylist, 1:100) Threads.@threads for obj in pyobjs From 73f7eb8fa1d96c98434d60ddf50f54ddcd031a7e Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 21:06:03 +0100 Subject: [PATCH 20/22] delete redundant test --- test/GC.jl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/GC.jl b/test/GC.jl index f53bfec5..3691a315 100644 --- a/test/GC.jl +++ b/test/GC.jl @@ -1,12 +1,3 @@ -@testitem "201: GC segfaults" begin - # https://github.com/JuliaPy/PythonCall.jl/issues/201 - # This should not segfault! - cmd = Base.julia_cmd() - path = joinpath(@__DIR__, "finalize_test_script.jl") - p = run(`$cmd -t2 --project $path`) - @test p.exitcode == 0 -end - @testitem "GC.gc()" begin let pyobjs = map(pylist, 1:100) From 2a54ca9b85e0cc8280b58f162dce0b7968c5ca9c Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 21:20:43 +0100 Subject: [PATCH 21/22] remove out of date comment --- pytest/test_all.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytest/test_all.py b/pytest/test_all.py index f94f91f6..10f78462 100644 --- a/pytest/test_all.py +++ b/pytest/test_all.py @@ -81,8 +81,6 @@ def test_julia_gc(): # We make a bunch of python objects with no reference to them, # then call GC to try to finalize them. # We want to make sure we don't segfault. - # Here we can (manually) verify that the background task is running successfully, - # by seeing the printout "Python GC (100 items): 0.000000 seconds." # We also programmatically check things are working by verifying the queue is empty. # Debugging note: if you get segfaults, then run the tests with # `PYTHON_JULIACALL_HANDLE_SIGNALS=yes python3 -X faulthandler -m pytest -p no:faulthandler -s --nbval --cov=pysrc ./pytest/` From ca64d21ff2af861db344d9736f28f8666ab5af9d Mon Sep 17 00:00:00 2001 From: Christopher Doris Date: Fri, 2 Aug 2024 21:40:53 +0100 Subject: [PATCH 22/22] comment erroneous test --- test/GC.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/GC.jl b/test/GC.jl index 3691a315..2467f694 100644 --- a/test/GC.jl +++ b/test/GC.jl @@ -5,7 +5,9 @@ finalize(obj) end end - Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) + # The GC sometimes actually frees everything before this line. + # We can uncomment this line if we GIL.@release the above block once we have it. + # Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) PythonCall.GC.gc() @test isempty(PythonCall.GC.QUEUE.items) end @@ -17,7 +19,9 @@ end finalize(obj) end end - Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) + # The GC sometimes actually frees everything before this line. + # We can uncomment this line if we GIL.@release the above block once we have it. + # Threads.nthreads() > 1 && @test !isempty(PythonCall.GC.QUEUE.items) GC.gc() @test isempty(PythonCall.GC.QUEUE.items) end