Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Thread local fallback weak bag #2844

Merged
merged 7 commits into from
Feb 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright 2020-2022 Typelevel
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package cats.effect.benchmarks

import cats.effect.unsafe.FiberMonitor

import org.openjdk.jmh.annotations._

import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration._

import java.util.concurrent.TimeUnit

/**
* To run the benchmark from within sbt:
*
* jmh:run -i 10 -wi 10 -f 2 -t 1 cats.effect.benchmarks.ThreadLocalBenchmark
*
* Which means "10 iterations", "10 warm-up iterations", "2 forks", "1 thread". Please note that
* benchmarks should be usually executed at least in 10 iterations (as a rule of thumb), but
* more is better.
*/
@State(Scope.Thread)
@BenchmarkMode(Array(Mode.Throughput))
@OutputTimeUnit(TimeUnit.SECONDS)
class ThreadLocalBenchmark {

final implicit val executionContext: ExecutionContext = ExecutionContext.global

@Param(Array("2000"))
var size: Int = _

@Benchmark
def contention() = {
val monitor = new FiberMonitor(null)

def future(): Future[Unit] = Future {
monitor.monitorSuspended(null)
()
}

Await.result(Future.sequence(List.fill(size)(future())), Duration.Inf)
}
}
5 changes: 4 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,10 @@ lazy val core = crossProject(JSPlatform, JVMPlatform)
// introduced by #2769, Simplify the transfer of WorkerThread data structures when blocking
// changes to `cats.effect.unsafe` package private code
ProblemFilters.exclude[MissingClassProblem]("cats.effect.unsafe.WorkerThread$"),
ProblemFilters.exclude[MissingClassProblem]("cats.effect.unsafe.WorkerThread$Data")
ProblemFilters.exclude[MissingClassProblem]("cats.effect.unsafe.WorkerThread$Data"),
// introduced by #2844, Thread local fallback weak bag
// changes to `cats.effect.unsafe` package private code
ProblemFilters.exclude[MissingClassProblem]("cats.effect.unsafe.SynchronizedWeakBag")
) ++ {
if (isDotty.value) {
// Scala 3 specific exclusions
Expand Down
43 changes: 25 additions & 18 deletions core/jvm/src/main/scala/cats/effect/unsafe/FiberMonitor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ package cats.effect
package unsafe

import cats.effect.tracing.TracingConstants
import cats.effect.unsafe.ref.WeakReference

import scala.collection.mutable
import scala.concurrent.ExecutionContext

import java.util.concurrent.ThreadLocalRandom
import java.util.concurrent.ConcurrentLinkedQueue

/**
* A slightly more involved implementation of an unordered bag used for tracking asynchronously
Expand All @@ -49,16 +50,8 @@ private[effect] final class FiberMonitor(
private[this] val compute: WorkStealingThreadPool
) extends FiberMonitorShared {

private[this] val size: Int = Runtime.getRuntime().availableProcessors() << 2
private[this] val bags: Array[SynchronizedWeakBag[IOFiber[_]]] = new Array(size)

{
var i = 0
while (i < size) {
bags(i) = new SynchronizedWeakBag()
i += 1
}
}
private[this] final val Bags = FiberMonitor.Bags
private[this] final val BagReferences = FiberMonitor.BagReferences

/**
* Registers a suspended fiber.
Expand Down Expand Up @@ -145,18 +138,21 @@ private[effect] final class FiberMonitor(
else ()

private[this] def monitorFallback(fiber: IOFiber[_]): WeakBag.Handle = {
val rnd = ThreadLocalRandom.current()
val idx = rnd.nextInt(size)
bags(idx).insert(fiber)
val bag = Bags.get()
val handle = bag.insert(fiber)
bag.synchronizationPoint.lazySet(true)
handle
}

private[this] def foreignFibers(): Set[IOFiber[_]] = {
val foreign = mutable.Set.empty[IOFiber[_]]

var i = 0
while (i < size) {
foreign ++= bags(i).toSet
i += 1
BagReferences.iterator().forEachRemaining { bagRef =>
val bag = bagRef.get()
if (bag ne null) {
val _ = bag.synchronizationPoint.get()
foreign ++= bag.toSet
}
}

foreign.toSet
Expand All @@ -172,4 +168,15 @@ private[effect] object FiberMonitor {
new FiberMonitor(null)
}
}

private[FiberMonitor] final val Bags: ThreadLocal[WeakBag[IOFiber[_]]] =
ThreadLocal.withInitial { () =>
val bag = new WeakBag[IOFiber[_]]()
BagReferences.offer(new WeakReference(bag))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vasilmkd sorry, I had a follow-up question about this change.

Is it possible that we could lose track of suspended fibers, if the threads that they were suspended from no longer exist? Is that even a realistic situation 😆

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a possibility, yes. The change was made with the intention that having an already inaccurate reporting mechanism remain that way is better than a memory leak. If people disagree, PRs are welcome.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair, thanks.

Probably over-complicated but I wonder if we could use a PhantomReference to "evacuate" the contents of the bag when its owning thread gets GCed.

Btw, since the WSTP also dynamically adds/removes threads, how is this problem handled there?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The WSTP does not use this code path. I'm open to exploring Phantom References.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The WSTP does not use this code path.

Right :) but it still uses a thread-local fiber bag right? And the threads may be added/removed as the WSTP resizes itself? So it seems like it's a very similar problem.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I wasn't sure if it's worth it :) instead of a dedicated thread, is this something we can schedule on the runtime itself?

Copy link
Member Author

@vasilmkd vasilmkd Feb 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's what we had before. It requires solving mapping of threads to bags, which was done using locking. If we come up with a concurrent weak bag/hash map, then sure. But not even JCTools has that afaik. It's a big undertaking.

Edit: I misunderstood your comment and answered something completely different.

Copy link
Member Author

@vasilmkd vasilmkd Feb 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Scheduling on the runtime requires answering how often do to run it, which to me doesn't seem like a good strategy for something considered to be memory beneficial/critical. And ReferenceQueue is not too smart of an interface either. You can poll it in a non-blocking way, and when it returns null, when do you try again? The proper way IMO is to block on it and run cleanup on each expiry.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it doesn't seem very elegant :) I feel like in practice, there must be some reasonable rate at which we can check the ReferenceQueue ... if an application is adding/removing threads too fast seems like its performance would be bounded by other factors anyway. But I don't really know about such things :)

After thinking about this more, seems like it could be important. A situation in which there is a deadlock seems like exactly the situation when a dynamically resizing threadpool would start culling threads due to lack of work, which could cause GC of the fiber bag holding the fibers would help diagnose the deadlock.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@djspiewak 👆🏻

bag
}

private[FiberMonitor] final val BagReferences
: ConcurrentLinkedQueue[WeakReference[WeakBag[IOFiber[_]]]] =
new ConcurrentLinkedQueue()
}

This file was deleted.

5 changes: 4 additions & 1 deletion core/shared/src/main/scala/cats/effect/unsafe/WeakBag.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import cats.effect.unsafe.ref.{ReferenceQueue, WeakReference}
import scala.annotation.tailrec
import scala.collection.mutable

import java.util.concurrent.atomic.AtomicBoolean

private final class WeakBag[A <: AnyRef] {
import WeakBag._

Expand All @@ -29,7 +31,8 @@ private final class WeakBag[A <: AnyRef] {
private[this] val queue: ReferenceQueue[A] = new ReferenceQueue()
private[this] var capacity: Int = 256
private[this] var table: Array[Entry[A]] = new Array(capacity)
private[this] var index = 0
private[this] var index: Int = 0
private[unsafe] val synchronizationPoint: AtomicBoolean = new AtomicBoolean(true)

@tailrec
def insert(a: A): Handle = {
Expand Down