Skip to content

Commit

Permalink
Merge pull request vespa-engine#32314 from vespa-engine/arnej/add-log…
Browse files Browse the repository at this point in the history
…server-memory-feature-flag

add feature flag for tuning memory on logserver nodes
  • Loading branch information
Harald Musum authored Sep 3, 2024
2 parents 2764ffd + 045e9c0 commit 6c04a09
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 8 deletions.
1 change: 1 addition & 0 deletions config-model-api/abi-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -1346,6 +1346,7 @@
"public boolean logserverOtelCol()",
"public com.yahoo.config.provision.SharedHosts sharedHosts()",
"public com.yahoo.config.provision.NodeResources$Architecture adminClusterArchitecture()",
"public double logserverNodeMemory()",
"public boolean symmetricPutAndActivateReplicaSelection()",
"public boolean enforceStrictlyIncreasingClusterStateVersions()",
"public boolean distributionConfigFromClusterController()",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ interface FeatureFlags {
@ModelFeatureFlag(owners = {"olaa"}) default boolean logserverOtelCol() { return false; }
@ModelFeatureFlag(owners = {"bratseth"}) default SharedHosts sharedHosts() { return SharedHosts.empty(); }
@ModelFeatureFlag(owners = {"bratseth"}) default Architecture adminClusterArchitecture() { return Architecture.x86_64; }
@ModelFeatureFlag(owners = {"arnej"}) default double logserverNodeMemory() { return 0.0; }
@ModelFeatureFlag(owners = {"vekterli"}) default boolean symmetricPutAndActivateReplicaSelection() { return false; }
@ModelFeatureFlag(owners = {"vekterli"}) default boolean enforceStrictlyIncreasingClusterStateVersions() { return false; }
@ModelFeatureFlag(owners = {"vekterli"}) default boolean distributionConfigFromClusterController() { return false; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ public class QuotaValidator implements Validator {
public void validate(Context context) {
var zone = context.deployState().zone();
var exclusivity = new Exclusivity(zone, context.deployState().featureFlags().sharedHosts());
var tuning = new CapacityPolicies.Tuning(context.deployState().featureFlags().adminClusterArchitecture(),
context.deployState().featureFlags().logserverNodeMemory());
var capacityPolicies = new CapacityPolicies(zone, exclusivity, context.model().applicationPackage().getApplicationId(),
context.deployState().featureFlags().adminClusterArchitecture());
tuning);
var quota = context.deployState().getProperties().quota();
quota.maxClusterSize().ifPresent(maxClusterSize -> validateMaxClusterSize(maxClusterSize, context.model()));
quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, context, capacityPolicies));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,29 @@
*/
public class CapacityPolicies {

public record Tuning(Architecture adminClusterArchitecture,
double logserverMemoryGiB)
{
double logserverMem(double v) {
double override = logserverMemoryGiB();
return (override > 0) ? override : v;
}
}

private final Zone zone;
private final Exclusivity exclusivity;
private final ApplicationId applicationId;
private final Architecture adminClusterArchitecture;
private final Tuning tuning;

public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Architecture adminClusterArchitecture) {
this(zone, exclusivity, applicationId, new Tuning(adminClusterArchitecture, 0.0));
}

public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Tuning tuning) {
this.zone = zone;
this.exclusivity = exclusivity;
this.applicationId = applicationId;
this.adminClusterArchitecture = adminClusterArchitecture;
this.tuning = tuning;
}

public Capacity applyOn(Capacity capacity, boolean exclusive) {
Expand Down Expand Up @@ -92,6 +105,7 @@ public NodeResources specifyFully(NodeResources resources, ClusterSpec clusterSp
}

private NodeResources defaultResources(ClusterSpec clusterSpec) {
var adminClusterArchitecture = tuning.adminClusterArchitecture();
if (clusterSpec.type() == ClusterSpec.Type.admin) {
if (exclusivity.allocation(clusterSpec)) {
return smallestExclusiveResources().with(adminClusterArchitecture);
Expand Down Expand Up @@ -134,14 +148,14 @@ private NodeResources clusterControllerResources(ClusterSpec clusterSpec, Archit

private NodeResources logserverResources(Architecture architecture) {
if (zone.cloud().name() == CloudName.AZURE)
return new NodeResources(2, 4, 50, 0.3);
return new NodeResources(2, tuning.logserverMem(4.0), 50, 0.3);

if (zone.cloud().name() == CloudName.GCP)
return new NodeResources(1, 4, 50, 0.3);
return new NodeResources(1, tuning.logserverMem(4.0), 50, 0.3);

return architecture == Architecture.arm64
? new NodeResources(0.5, 2.5, 50, 0.3)
: new NodeResources(0.5, 2, 50, 0.3);
? new NodeResources(0.5, tuning.logserverMem(2.5), 50, 0.3)
: new NodeResources(0.5, tuning.logserverMem(2.0), 50, 0.3);
}

// The lowest amount of resources that can be exclusive allocated (i.e. a matching host flavor for this exists)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ public static class FeatureFlags implements ModelContext.FeatureFlags {
private final boolean logserverOtelCol;
private final SharedHosts sharedHosts;
private final Architecture adminClusterArchitecture;
private final double logserverNodeMemory;
private final boolean symmetricPutAndActivateReplicaSelection;
private final boolean enforceStrictlyIncreasingClusterStateVersions;
private final boolean launchApplicationAthenzService;
Expand Down Expand Up @@ -258,6 +259,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
this.logserverOtelCol = Flags.LOGSERVER_OTELCOL_AGENT.bindTo(source).with(appId).with(version).value();
this.sharedHosts = PermanentFlags.SHARED_HOST.bindTo(source).with( appId).with(version).value();
this.adminClusterArchitecture = Architecture.valueOf(PermanentFlags.ADMIN_CLUSTER_NODE_ARCHITECTURE.bindTo(source).with(appId).with(version).value());
this.logserverNodeMemory = PermanentFlags.LOGSERVER_NODE_MEMORY.bindTo(source).with(appId).with(version).value();
this.symmetricPutAndActivateReplicaSelection = Flags.SYMMETRIC_PUT_AND_ACTIVATE_REPLICA_SELECTION.bindTo(source).with(appId).with(version).value();
this.enforceStrictlyIncreasingClusterStateVersions = Flags.ENFORCE_STRICTLY_INCREASING_CLUSTER_STATE_VERSIONS.bindTo(source).with(appId).with(version).value();
this.launchApplicationAthenzService = Flags.LAUNCH_APPLICATION_ATHENZ_SERVICE.bindTo(source).with(appId).with(version).value();
Expand Down Expand Up @@ -314,6 +316,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
@Override public boolean logserverOtelCol() { return logserverOtelCol; }
@Override public SharedHosts sharedHosts() { return sharedHosts; }
@Override public Architecture adminClusterArchitecture() { return adminClusterArchitecture; }
@Override public double logserverNodeMemory() { return logserverNodeMemory; }
@Override public boolean symmetricPutAndActivateReplicaSelection() { return symmetricPutAndActivateReplicaSelection; }
@Override public boolean enforceStrictlyIncreasingClusterStateVersions() { return enforceStrictlyIncreasingClusterStateVersions; }
@Override public boolean distributionConfigFromClusterController() { return distributionConfigFromClusterController; }
Expand Down
6 changes: 6 additions & 0 deletions flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ public class PermanentFlags {
value -> Set.of("any", "arm64", "x86_64").contains(value),
INSTANCE_ID);

public static final UnboundDoubleFlag LOGSERVER_NODE_MEMORY = defineDoubleFlag(
"logserver-node-memory", 0.0,
"Amount of memory (in GiB) to allocate for logserver nodes",
"Takes effect on allocation from node repository",
INSTANCE_ID);

public static final UnboundListFlag<String> CLOUD_ACCOUNTS = defineListFlag(
"cloud-accounts", List.of(), String.class,
"A list of 12-digit AWS account IDs that are valid for the given tenant",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,13 @@ public CapacityPolicies capacityPoliciesFor(ApplicationId applicationId) {
.bindTo(flagSource)
.with(INSTANCE_ID, applicationId.serializedForm())
.value();
return new CapacityPolicies(zone, exclusivity(), applicationId, Architecture.valueOf(adminClusterNodeArchitecture));
double logserverMemory = PermanentFlags.LOGSERVER_NODE_MEMORY
.bindTo(flagSource)
.with(INSTANCE_ID, applicationId.serializedForm())
.value();
var tuning = new CapacityPolicies.Tuning(Architecture.valueOf(adminClusterNodeArchitecture),
logserverMemory);
return new CapacityPolicies(zone, exclusivity(), applicationId, tuning);
}

/**
Expand Down

0 comments on commit 6c04a09

Please sign in to comment.