Skip to content

Commit

Permalink
Batch update with singleton/sharding fixes (akkadotnet#3780)
Browse files Browse the repository at this point in the history
* Introduce 'MemberDowned' member event
Migrated from #25854

* Ignore possible state change while waiting for removal
Migrated from #25274

* mark TryToIdentifySingleton as NoSerializationVerificationNeeded
Migrated from #24442

* Improvements of singleton leaving scenario
Migrated from #25639 (#25710)

Testing of singleton leaving
gossip optimization, exiting change to two oldest per role
hardening ClusterSingletonManagerIsStuck restart, increase ClusterSingletonManagerIsStuck

* Stop singleton and shards when self MemberDowned
Migrated from #26336 (#26339)

Stop singleton when self MemberDowned
  * It's safer to stop singleton instance early in case of downing.
  * Instead of waiting for MemberRemoved and trying to hand over.
Stop ShardRegion when self MemberDowned
Upper bound when waiting for seen in shutdownSelfWhenDown

* Discards HandOverToMe in state End to avoid unhandled message warning
Migrated from #26793

* Warn if handOffStopMessage not handled
Migrated from #25648

Stops entities of shard forcefully if they don't handle stopMessage
Prints a warning log while stopping the entities
fix version of backward exclude file and checks for shard stopped
adds documentation for handoff timeout

* Save EntityStarted when StartEntity requested via remembered entities (for validation)
Migrated from #26061

* Improve default shard rebalancing algorithm
Migrated from #26012 (#26101)

* Improve default shard rebalancing algorithm

* Use rebalance-threshold=1 because it will give the best distribution,
  and previous default could result in too large difference between nodes
* Off by one error, difference > threshold vs >=
* Added more unit tests
* Note that in some cases it may still not be optimal, stopping more
  shards than necessary, but a different strategy that looks at more
  than most and least is out of scope for this issue. In practise
  those cases shouldn't matter much.
* Also note that the rebalance interval is by default 10 seconds,
  so typically shards will start up before next rebalance tick.
  It's intentionally a slow process to not cause instabilities by
  moving too much at the same time.

* Always retry sendGracefulShutdownToCoordinator
Migrated from #26214

* I could reproduce the issue locally with debug logging and it's clear that it's a
  timing issue. The GracefulShutdownReq message goes to deadletters and it's not
  retried because the coordinator variable was unset.
* cluster-sharding-shutdown-region phase of CoordinatedShutdown timed out

* Consolidate duplicate persistence sharding function
Migrated from #26451 (#26452)

* API approval

* sharding with ddata specs timing issue fix

* Enable passivate-idle-entity-after by default
Migrated from #25782 (#26819)

* persistent shard passivate-idle fix
ddata passivate-idle fix
sharding fixes (part of #26878)

* Removed references to System.ValueTuple
  • Loading branch information
zbynek001 authored and Aaronontheweb committed Jul 21, 2019
1 parent e96d567 commit 125d738
Show file tree
Hide file tree
Showing 35 changed files with 1,148 additions and 258 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class = ""Akka.Cluster.Sharding.Tests.MemoryJournalShared, Akka.Cluster.Sharding
}}
"))
.WithFallback(Sharding.ClusterSharding.DefaultConfig())
.WithFallback(DistributedData.DistributedData.DefaultConfig())
.WithFallback(Tools.Singleton.ClusterSingletonManager.DefaultConfig())
.WithFallback(MultiNodeClusterSpec.ClusterConfig());

Expand Down Expand Up @@ -177,7 +178,7 @@ protected ClusterShardingRememberEntitiesNewExtractorSpec(ClusterShardingRemembe
EnterBarrier("startup");
}
protected bool IsDDataMode { get; }

protected override void AfterTermination()
{
base.AfterTermination();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class = ""Akka.Cluster.Sharding.Tests.MemoryJournalShared, Akka.Cluster.Sharding
}}
"))
.WithFallback(Sharding.ClusterSharding.DefaultConfig())
.WithFallback(DistributedData.DistributedData.DefaultConfig())
.WithFallback(Tools.Singleton.ClusterSingletonManager.DefaultConfig())
.WithFallback(MultiNodeClusterSpec.ClusterConfig());

Expand Down Expand Up @@ -158,7 +159,7 @@ protected ClusterShardingRememberEntitiesSpec(ClusterShardingRememberEntitiesSpe
}

protected bool IsDDataMode { get; }

protected override void AfterTermination()
{
base.AfterTermination();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ protected ClusterShardingSpecConfig(string mode, string entityRecoveryStrategy =
number-of-entities = 1
}}
least-shard-allocation-strategy {{
rebalance-threshold = 2
rebalance-threshold = 1
max-simultaneous-rebalance = 1
}}
distributed-data.durable.lmdb {{
Expand Down Expand Up @@ -468,7 +468,7 @@ private void CreateCoordinator()
"coordinator",
TimeSpan.FromSeconds(5),
TimeSpan.FromSeconds(5),
0.1,
0.1,
-1).WithDeploy(Deploy.Local);

Sys.ActorOf(ClusterSingletonManager.Props(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public void Should_cluster_sharding_settings_have_default_config()
Assert.Equal("akka.cluster.singleton", config.GetString("coordinator-singleton"));
Assert.Equal(string.Empty, config.GetString("use-dispatcher"));

Assert.Equal(10, config.GetInt("least-shard-allocation-strategy.rebalance-threshold"));
Assert.Equal(1, config.GetInt("least-shard-allocation-strategy.rebalance-threshold"));
Assert.Equal(3, config.GetInt("least-shard-allocation-strategy.max-simultaneous-rebalance"));

Assert.Equal("all", config.GetString("entity-recovery-strategy"));
Expand All @@ -60,7 +60,7 @@ public void Should_cluster_sharding_settings_have_default_config()
Assert.Equal("singleton", singletonConfig.GetString("singleton-name"));
Assert.Equal(string.Empty, singletonConfig.GetString("role"));
Assert.Equal(TimeSpan.FromSeconds(1), singletonConfig.GetTimeSpan("hand-over-retry-interval"));
Assert.Equal(10, singletonConfig.GetInt("min-number-of-hand-over-retries"));
Assert.Equal(15, singletonConfig.GetInt("min-number-of-hand-over-retries"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,40 @@ public void ClusterSharding_must_start_a_region_in_proxy_mode_in_case_of_node_ro

region.Should().BeSameAs(proxy);
}

[Fact]
public void HandOffStopper_must_stop_the_entity_even_if_the_entity_doesnt_handle_handOffStopMessage()
{

var probe = CreateTestProbe();
var shardName = "test";
var emptyHandlerActor = Sys.ActorOf(Props.Create(() => new EmptyHandlerActor()));
var handOffStopper = Sys.ActorOf(
Props.Create(() => new ShardRegion.HandOffStopper(shardName, probe.Ref, new IActorRef[] { emptyHandlerActor }, HandOffStopMessage.Instsnce, TimeSpan.FromMilliseconds(10)))
);

Watch(emptyHandlerActor);
ExpectTerminated(emptyHandlerActor, TimeSpan.FromSeconds(1));

probe.ExpectMsg(new PersistentShardCoordinator.ShardStopped(shardName), TimeSpan.FromSeconds(1));

Watch(handOffStopper);
ExpectTerminated(handOffStopper, TimeSpan.FromSeconds(1));
}

internal class HandOffStopMessage : INoSerializationVerificationNeeded
{
public static readonly HandOffStopMessage Instsnce = new HandOffStopMessage();
private HandOffStopMessage()
{
}
}

internal class EmptyHandlerActor : UntypedActor
{
protected override void OnReceive(object message)
{
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,15 @@ protected override void BeforeTermination()
/// </summary>
private void PingEntities()
{
_region2.Tell(1, _probe2.Ref);
_probe2.ExpectMsg<int>(10.Seconds()).Should().Be(1);
_region2.Tell(2, _probe2.Ref);
_probe2.ExpectMsg<int>(10.Seconds()).Should().Be(2);
_region2.Tell(3, _probe2.Ref);
_probe2.ExpectMsg<int>(10.Seconds()).Should().Be(3);
AwaitAssert(() =>
{
_region2.Tell(1, _probe2.Ref);
_probe2.ExpectMsg<int>(1.Seconds()).Should().Be(1);
_region2.Tell(2, _probe2.Ref);
_probe2.ExpectMsg<int>(1.Seconds()).Should().Be(2);
_region2.Tell(3, _probe2.Ref);
_probe2.ExpectMsg<int>(1.Seconds()).Should().Be(3);
}, TimeSpan.FromSeconds(10));
}

[Fact]
Expand Down
Loading

0 comments on commit 125d738

Please sign in to comment.