Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add feature flag for tuning memory on logserver nodes #32314

Merged
merged 1 commit into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config-model-api/abi-spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -1346,6 +1346,7 @@
"public boolean logserverOtelCol()",
"public com.yahoo.config.provision.SharedHosts sharedHosts()",
"public com.yahoo.config.provision.NodeResources$Architecture adminClusterArchitecture()",
"public double logserverNodeMemory()",
"public boolean symmetricPutAndActivateReplicaSelection()",
"public boolean enforceStrictlyIncreasingClusterStateVersions()",
"public boolean distributionConfigFromClusterController()",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ interface FeatureFlags {
@ModelFeatureFlag(owners = {"olaa"}) default boolean logserverOtelCol() { return false; }
@ModelFeatureFlag(owners = {"bratseth"}) default SharedHosts sharedHosts() { return SharedHosts.empty(); }
@ModelFeatureFlag(owners = {"bratseth"}) default Architecture adminClusterArchitecture() { return Architecture.x86_64; }
@ModelFeatureFlag(owners = {"arnej"}) default double logserverNodeMemory() { return 0.0; }
@ModelFeatureFlag(owners = {"vekterli"}) default boolean symmetricPutAndActivateReplicaSelection() { return false; }
@ModelFeatureFlag(owners = {"vekterli"}) default boolean enforceStrictlyIncreasingClusterStateVersions() { return false; }
@ModelFeatureFlag(owners = {"vekterli"}) default boolean distributionConfigFromClusterController() { return false; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ public class QuotaValidator implements Validator {
public void validate(Context context) {
var zone = context.deployState().zone();
var exclusivity = new Exclusivity(zone, context.deployState().featureFlags().sharedHosts());
var tuning = new CapacityPolicies.Tuning(context.deployState().featureFlags().adminClusterArchitecture(),
context.deployState().featureFlags().logserverNodeMemory());
var capacityPolicies = new CapacityPolicies(zone, exclusivity, context.model().applicationPackage().getApplicationId(),
context.deployState().featureFlags().adminClusterArchitecture());
tuning);
var quota = context.deployState().getProperties().quota();
quota.maxClusterSize().ifPresent(maxClusterSize -> validateMaxClusterSize(maxClusterSize, context.model()));
quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, context, capacityPolicies));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,29 @@
*/
public class CapacityPolicies {

public record Tuning(Architecture adminClusterArchitecture,
double logserverMemoryGiB)
{
double logserverMem(double v) {
double override = logserverMemoryGiB();
return (override > 0) ? override : v;
}
}

private final Zone zone;
private final Exclusivity exclusivity;
private final ApplicationId applicationId;
private final Architecture adminClusterArchitecture;
private final Tuning tuning;

public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Architecture adminClusterArchitecture) {
this(zone, exclusivity, applicationId, new Tuning(adminClusterArchitecture, 0.0));
}

public CapacityPolicies(Zone zone, Exclusivity exclusivity, ApplicationId applicationId, Tuning tuning) {
this.zone = zone;
this.exclusivity = exclusivity;
this.applicationId = applicationId;
this.adminClusterArchitecture = adminClusterArchitecture;
this.tuning = tuning;
}

public Capacity applyOn(Capacity capacity, boolean exclusive) {
Expand Down Expand Up @@ -92,6 +105,7 @@ public NodeResources specifyFully(NodeResources resources, ClusterSpec clusterSp
}

private NodeResources defaultResources(ClusterSpec clusterSpec) {
var adminClusterArchitecture = tuning.adminClusterArchitecture();
if (clusterSpec.type() == ClusterSpec.Type.admin) {
if (exclusivity.allocation(clusterSpec)) {
return smallestExclusiveResources().with(adminClusterArchitecture);
Expand Down Expand Up @@ -134,14 +148,14 @@ private NodeResources clusterControllerResources(ClusterSpec clusterSpec, Archit

private NodeResources logserverResources(Architecture architecture) {
if (zone.cloud().name() == CloudName.AZURE)
return new NodeResources(2, 4, 50, 0.3);
return new NodeResources(2, tuning.logserverMem(4.0), 50, 0.3);

if (zone.cloud().name() == CloudName.GCP)
return new NodeResources(1, 4, 50, 0.3);
return new NodeResources(1, tuning.logserverMem(4.0), 50, 0.3);

return architecture == Architecture.arm64
? new NodeResources(0.5, 2.5, 50, 0.3)
: new NodeResources(0.5, 2, 50, 0.3);
? new NodeResources(0.5, tuning.logserverMem(2.5), 50, 0.3)
: new NodeResources(0.5, tuning.logserverMem(2.0), 50, 0.3);
}

// The lowest amount of resources that can be exclusive allocated (i.e. a matching host flavor for this exists)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ public static class FeatureFlags implements ModelContext.FeatureFlags {
private final boolean logserverOtelCol;
private final SharedHosts sharedHosts;
private final Architecture adminClusterArchitecture;
private final double logserverNodeMemory;
private final boolean symmetricPutAndActivateReplicaSelection;
private final boolean enforceStrictlyIncreasingClusterStateVersions;
private final boolean launchApplicationAthenzService;
Expand Down Expand Up @@ -258,6 +259,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
this.logserverOtelCol = Flags.LOGSERVER_OTELCOL_AGENT.bindTo(source).with(appId).with(version).value();
this.sharedHosts = PermanentFlags.SHARED_HOST.bindTo(source).with( appId).with(version).value();
this.adminClusterArchitecture = Architecture.valueOf(PermanentFlags.ADMIN_CLUSTER_NODE_ARCHITECTURE.bindTo(source).with(appId).with(version).value());
this.logserverNodeMemory = PermanentFlags.LOGSERVER_NODE_MEMORY.bindTo(source).with(appId).with(version).value();
this.symmetricPutAndActivateReplicaSelection = Flags.SYMMETRIC_PUT_AND_ACTIVATE_REPLICA_SELECTION.bindTo(source).with(appId).with(version).value();
this.enforceStrictlyIncreasingClusterStateVersions = Flags.ENFORCE_STRICTLY_INCREASING_CLUSTER_STATE_VERSIONS.bindTo(source).with(appId).with(version).value();
this.launchApplicationAthenzService = Flags.LAUNCH_APPLICATION_ATHENZ_SERVICE.bindTo(source).with(appId).with(version).value();
Expand Down Expand Up @@ -314,6 +316,7 @@ public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
@Override public boolean logserverOtelCol() { return logserverOtelCol; }
@Override public SharedHosts sharedHosts() { return sharedHosts; }
@Override public Architecture adminClusterArchitecture() { return adminClusterArchitecture; }
@Override public double logserverNodeMemory() { return logserverNodeMemory; }
@Override public boolean symmetricPutAndActivateReplicaSelection() { return symmetricPutAndActivateReplicaSelection; }
@Override public boolean enforceStrictlyIncreasingClusterStateVersions() { return enforceStrictlyIncreasingClusterStateVersions; }
@Override public boolean distributionConfigFromClusterController() { return distributionConfigFromClusterController; }
Expand Down
6 changes: 6 additions & 0 deletions flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ public class PermanentFlags {
value -> Set.of("any", "arm64", "x86_64").contains(value),
INSTANCE_ID);

public static final UnboundDoubleFlag LOGSERVER_NODE_MEMORY = defineDoubleFlag(
"logserver-node-memory", 0.0,
"Amount of memory (in GiB) to allocate for logserver nodes",
"Takes effect on allocation from node repository",
INSTANCE_ID);

public static final UnboundListFlag<String> CLOUD_ACCOUNTS = defineListFlag(
"cloud-accounts", List.of(), String.class,
"A list of 12-digit AWS account IDs that are valid for the given tenant",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,13 @@ public CapacityPolicies capacityPoliciesFor(ApplicationId applicationId) {
.bindTo(flagSource)
.with(INSTANCE_ID, applicationId.serializedForm())
.value();
return new CapacityPolicies(zone, exclusivity(), applicationId, Architecture.valueOf(adminClusterNodeArchitecture));
double logserverMemory = PermanentFlags.LOGSERVER_NODE_MEMORY
.bindTo(flagSource)
.with(INSTANCE_ID, applicationId.serializedForm())
.value();
var tuning = new CapacityPolicies.Tuning(Architecture.valueOf(adminClusterNodeArchitecture),
logserverMemory);
return new CapacityPolicies(zone, exclusivity(), applicationId, tuning);
}

/**
Expand Down