Skip to content

Commit

Permalink
[WIP][forge] rust bindings for forge indexer/testnet deployer
Browse files Browse the repository at this point in the history
  • Loading branch information
rustielin committed Sep 6, 2024
1 parent 5d6e1ff commit 48a2dda
Show file tree
Hide file tree
Showing 8 changed files with 370 additions and 27 deletions.
46 changes: 27 additions & 19 deletions testsuite/forge-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ enum OperatorCommand {
SetNodeImageTag(SetNodeImageTag),
/// Clean up an existing cluster
CleanUp(CleanUp),
/// Resize an existing cluster
Resize(Resize),
/// Create a new cluster for testing purposes
Create(Create),
}

#[derive(Parser, Debug)]
Expand Down Expand Up @@ -217,8 +217,8 @@ struct CleanUp {
}

#[derive(Parser, Debug)]
struct Resize {
#[clap(long, help = "The kubernetes namespace to resize")]
struct Create {
#[clap(long, help = "The kubernetes namespace to create in")]
namespace: String,
#[clap(long, default_value_t = 30)]
num_validators: usize,
Expand All @@ -227,13 +227,13 @@ struct Resize {
#[clap(
long,
help = "Override the image tag used for validators",
default_value = "devnet"
default_value = "main"
)]
validator_image_tag: String,
#[clap(
long,
help = "Override the image tag used for testnet-specific components",
default_value = "devnet"
default_value = "main"
)]
testnet_image_tag: String,
#[clap(
Expand All @@ -248,6 +248,8 @@ struct Resize {
connect_directly: bool,
#[clap(long, help = "If set, enables HAProxy for each of the validators")]
enable_haproxy: bool,
#[clap(long, help = "If set, spins up an indexer stack alongside the testnet")]
with_indexer: bool,
}

// common metrics thresholds:
Expand Down Expand Up @@ -421,19 +423,25 @@ fn main() -> Result<()> {
}
Ok(())
},
OperatorCommand::Resize(resize) => {
runtime.block_on(install_testnet_resources(
resize.namespace,
resize.num_validators,
resize.num_fullnodes,
resize.validator_image_tag,
resize.testnet_image_tag,
resize.move_modules_dir,
!resize.connect_directly,
resize.enable_haproxy,
None,
None,
))?;
OperatorCommand::Create(create) => {
let kube_client = runtime.block_on(create_k8s_client())?;
let era = generate_new_era();
let values = ForgeDeployerValues {
profile: DEFAULT_FORGE_DEPLOYER_PROFILE.to_string(),
era,
namespace: create.namespace,
indexer_grpc_values: None,
indexer_processor_values: None,
};
let forge_deployer_manager =
ForgeDeployerManager::from_k8s_client(kube_client, values);
runtime.block_on(forge_deployer_manager.ensure_namespace_prepared())?;
// NOTE: this is generally not going to run from within the cluster, do not perform any operations
// that might require internal DNS resolution to work, such as txn emission directly against the node service IPs.
runtime.block_on(forge_deployer_manager.start(ForgeDeployerType::Testnet))?;
if create.with_indexer {
runtime.block_on(forge_deployer_manager.start(ForgeDeployerType::Indexer))?;
}
Ok(())
},
},
Expand Down
36 changes: 31 additions & 5 deletions testsuite/forge/src/backend/k8s/cluster_helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ pub async fn uninstall_testnet_resources(kube_namespace: String) -> Result<()> {
Ok(())
}

fn generate_new_era() -> String {
pub fn generate_new_era() -> String {
let mut rng = rand::thread_rng();
let r: u8 = rng.gen();
format!("forge{}", r)
Expand Down Expand Up @@ -826,15 +826,41 @@ fn dump_helm_values_to_file(helm_release_name: &str, tmp_dir: &TempDir) -> Resul

#[derive(Error, Debug)]
#[error("{0}")]
enum ApiError {
pub enum ApiError {
RetryableError(String),
FinalError(String),
}

async fn create_namespace(
/// Does the same as create_namespace and handling the 409, but for any k8s resource T
pub async fn maybe_create_k8s_resource<T>(
api: Arc<dyn ReadWrite<T>>,
resource: T,
) -> Result<T, ApiError>
where
T: kube::Resource + Clone + DeserializeOwned + Debug,
<T as kube::Resource>::DynamicType: Default,
{
if let Err(KubeError::Api(api_err)) = api.create(&PostParams::default(), &resource).await {
if api_err.code == 409 {
info!(
"Resource {} already exists, continuing with it",
resource.name()
);
} else {
return Err(ApiError::RetryableError(format!(
"Failed to use existing resource {}: {:?}",
resource.name(),
api_err
)));
}
}
Ok(resource)
}

pub async fn create_namespace(
namespace_api: Arc<dyn ReadWrite<Namespace>>,
kube_namespace: String,
) -> Result<(), ApiError> {
) -> Result<Namespace, ApiError> {
let kube_namespace_name = kube_namespace.clone();
let namespace = Namespace {
metadata: ObjectMeta {
Expand Down Expand Up @@ -866,7 +892,7 @@ async fn create_namespace(
)));
}
}
Ok(())
Ok(namespace)
}

pub async fn create_management_configmap(
Expand Down
30 changes: 27 additions & 3 deletions testsuite/forge/src/backend/k8s/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ pub mod prometheus;
mod stateful_set;
mod swarm;

use super::{
ForgeDeployerManager, ForgeDeployerType, ForgeDeployerValues, DEFAULT_FORGE_DEPLOYER_PROFILE,
};
use aptos_sdk::crypto::ed25519::ED25519_PRIVATE_KEY_LENGTH;
pub use cluster_helper::*;
pub use constants::*;
Expand Down Expand Up @@ -148,14 +151,14 @@ impl Factory for K8sFactory {

// We return early here if there are not enough PVs to claim.
check_persistent_volumes(
kube_client,
kube_client.clone(),
num_validators.get() + num_fullnodes,
existing_db_tag,
)
.await?;
}
// try installing testnet resources, but clean up if it fails
match install_testnet_resources(
let (new_era, validators, fullnodes) = match install_testnet_resources(
self.kube_namespace.clone(),
num_validators.get(),
num_fullnodes,
Expand All @@ -174,7 +177,28 @@ impl Factory for K8sFactory {
uninstall_testnet_resources(self.kube_namespace.clone()).await?;
bail!(e);
},
}
};

// add an indexer too!

// NOTE: by default, use a deploy profile and no additional configuration values
let values = ForgeDeployerValues {
profile: DEFAULT_FORGE_DEPLOYER_PROFILE.to_string(),
era: new_era.clone().expect("Era not set in created testnet"),
namespace: self.kube_namespace.clone(),
indexer_grpc_values: None,
indexer_processor_values: None,
};

let forge_deployer_manager =
ForgeDeployerManager::from_k8s_client(kube_client.clone(), values);

forge_deployer_manager.ensure_namespace_prepared().await?;
forge_deployer_manager
.start(ForgeDeployerType::Indexer)
.await?;

(new_era, validators, fullnodes)
};

let swarm = K8sSwarm::new(
Expand Down
10 changes: 10 additions & 0 deletions testsuite/forge/src/backend/k8s_deployer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Forge K8s Deployer Backend

This backend manages Forge "deployers", which are k8s jobs that spin up the necessary k8s infrastructure for Forge tests to run.
They mostly involve state management of the Forge namespace, ancillary resources like configmaps, and the deployer jobs themselves.

Forge deployers:

- Each deploy a single "component" of Forge infra, which may be dependent on some other components or resources. For example, this can be an indexer stack, which in turn relies on a testnet stack to exist
- Can take in customization values via the env var FORGE_DEPLOY_VALUES_JSON
- Have a known values schema but mostly rely on a "profile" that is suitable for most tests, that contains default sane values
18 changes: 18 additions & 0 deletions testsuite/forge/src/backend/k8s_deployer/constants.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) Aptos Foundation
// SPDX-License-Identifier: Apache-2.0

pub const FORGE_INDEXER_DEPLOYER_DOCKER_IMAGE_REPO: &str =
"us-docker.pkg.dev/aptos-registry/docker/forge-indexer-deployer";
pub const FORGE_TESTNET_DEPLOYER_DOCKER_IMAGE_REPO: &str =
"us-docker.pkg.dev/aptos-registry/docker/forge-testnet-deployer";

/// The version of the forge deployer image to use.
pub const FORGE_DEPLOYER_IMAGE_TAG: &str = "main"; // default to the latest stable build from the main branch

/// This is the service account name that the deployer will use to deploy the forge components. It may require extra permissions and additonal setup
pub const FORGE_DEPLOYER_SERVICE_ACCOUNT_NAME: &str = "forge";

/// This is the environment variable that is required to be set in the pod to provide the deployer
pub const FORGE_DEPLOYER_VALUES_ENV_VAR_NAME: &str = "FORGE_DEPLOY_VALUES_JSON";

pub const DEFAULT_FORGE_DEPLOYER_PROFILE: &str = "large";
Loading

0 comments on commit 48a2dda

Please sign in to comment.