Skip to content

Commit

Permalink
Advertise bootstrap addresses via maghemite (#1251)
Browse files Browse the repository at this point in the history
Replaces UDP multicast bootstrap peer discovery with ddmd client
  • Loading branch information
jgallagher authored Jun 24, 2022
1 parent e42c225 commit deab7db
Show file tree
Hide file tree
Showing 13 changed files with 263 additions and 470 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions ddm-admin-client/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::redundant_closure_call, clippy::needless_lifetimes)]

include!(concat!(env!("OUT_DIR"), "/ddm-admin-client.rs"));

impl Copy for types::Ipv6Prefix {}
1 change: 1 addition & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ chrono = { version = "0.4", features = [ "serde" ] }
clap = { version = "3.2", features = ["derive"] }
# Only used by the simulated sled agent.
crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "8314eeddd228ec0d76cefa40c4a41d3e2611ac18" }
ddm-admin-client = { path = "../ddm-admin-client" }
dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] }
futures = "0.3.21"
ipnetwork = "0.18"
Expand Down
92 changes: 76 additions & 16 deletions sled-agent/src/bootstrap/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use super::client::Client as BootstrapAgentClient;
use super::config::{Config, BOOTSTRAP_AGENT_PORT};
use super::discovery;
use super::ddm_admin_client::{DdmAdminClient, DdmError};
use super::params::SledAgentRequest;
use super::rss_handle::RssHandle;
use super::server::TrustQuorumMembership;
Expand All @@ -17,18 +17,26 @@ use crate::illumos::dladm::{self, Dladm, PhysicalLink};
use crate::illumos::zone::Zones;
use crate::server::Server as SledServer;
use crate::sp::SpHandle;
use ddm_admin_client::types::Ipv6Prefix;
use omicron_common::address::get_sled_address;
use omicron_common::api::external::{Error as ExternalError, MacAddr};
use omicron_common::backoff::{
internal_service_policy, retry_notify, BackoffError,
};
use slog::Logger;
use std::collections::HashSet;
use std::net::{Ipv6Addr, SocketAddrV6};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use thiserror::Error;
use tokio::sync::Mutex;

/// Initial octet of IPv6 for bootstrap addresses.
pub(crate) const BOOTSTRAP_PREFIX: u16 = 0xfdb0;

/// IPv6 prefix mask for bootstrap addresses.
pub(crate) const BOOTSTRAP_MASK: u8 = 64;

/// Describes errors which may occur while operating the bootstrap service.
#[derive(Error, Debug)]
pub enum BootstrapError {
Expand All @@ -39,6 +47,9 @@ pub enum BootstrapError {
err: std::io::Error,
},

#[error("Error contacting ddmd: {0}")]
DdmError(#[from] DdmError),

#[error("Error starting sled agent: {0}")]
SledError(String),

Expand All @@ -48,6 +59,9 @@ pub enum BootstrapError {
#[error(transparent)]
TrustQuorum(#[from] TrustQuorumError),

#[error("Error collecting peer addresses: {0}")]
PeerAddresses(String),

#[error("Failed to initialize bootstrap address: {err}")]
BootstrapAddress { err: crate::illumos::zone::EnsureGzAddressError },
}
Expand All @@ -65,7 +79,7 @@ pub(crate) struct Agent {
/// Store the parent log - without "component = BootstrapAgent" - so
/// other launched components can set their own value.
parent_log: Logger,
peer_monitor: discovery::PeerMonitor,
address: Ipv6Addr,

/// Our share of the rack secret, if we have one.
share: Mutex<Option<ShareDistribution>>,
Expand All @@ -86,7 +100,7 @@ fn mac_to_socket_addr(mac: MacAddr) -> SocketAddrV6 {
assert_eq!(6, mac_bytes.len());

let address = Ipv6Addr::new(
0xfdb0,
BOOTSTRAP_PREFIX,
((mac_bytes[0] as u16) << 8) | mac_bytes[1] as u16,
((mac_bytes[2] as u16) << 8) | mac_bytes[3] as u16,
((mac_bytes[4] as u16) << 8) | mac_bytes[5] as u16,
Expand Down Expand Up @@ -158,16 +172,17 @@ impl Agent {
)
.map_err(|err| BootstrapError::BootstrapAddress { err })?;

let peer_monitor = discovery::PeerMonitor::new(&ba_log, address)
.map_err(|err| BootstrapError::Io {
message: format!("Monitoring for peers from {address}"),
err,
})?;
// Start trying to notify ddmd of our bootstrap address so it can
// advertise it to other sleds.
tokio::spawn(advertise_bootstrap_address_via_ddmd(
ba_log.clone(),
address,
));

let agent = Agent {
log: ba_log,
parent_log: log,
peer_monitor,
address,
share: Mutex::new(None),
rss: Mutex::new(None),
sled_agent: Mutex::new(None),
Expand Down Expand Up @@ -284,10 +299,33 @@ impl Agent {
&self,
share: ShareDistribution,
) -> Result<RackSecret, BootstrapError> {
let ddm_admin_client = DdmAdminClient::new(self.log.clone())?;
let rack_secret = retry_notify(
internal_service_policy(),
|| async {
let other_agents = self.peer_monitor.peer_addrs().await;
let other_agents = {
// Manually build up a `HashSet` instead of `.collect()`ing
// so we can log if we see any duplicates.
let mut addrs = HashSet::new();
for addr in ddm_admin_client
.peer_addrs()
.await
.map_err(BootstrapError::DdmError)
.map_err(|err| BackoffError::transient(err))?
{
// We should never see duplicates; that would mean
// maghemite thinks two different sleds have the same
// bootstrap address!
if !addrs.insert(addr) {
let msg = format!("Duplicate peer addresses received from ddmd: {addr}");
error!(&self.log, "{}", msg);
return Err(BackoffError::permanent(
BootstrapError::PeerAddresses(msg),
));
}
}
addrs
};
info!(
&self.log,
"Bootstrap: Communicating with peers: {:?}", other_agents
Expand All @@ -300,7 +338,9 @@ impl Agent {
"Not enough peers to start establishing quorum"
);
return Err(BackoffError::transient(
TrustQuorumError::NotEnoughPeers,
BootstrapError::TrustQuorum(
TrustQuorumError::NotEnoughPeers,
),
));
}
info!(
Expand Down Expand Up @@ -329,14 +369,17 @@ impl Agent {
})
.collect();

// TODO: Parallelize this and keep track of whose shares we've already retrieved and
// don't resend. See https://github.com/oxidecomputer/omicron/issues/514
// TODO: Parallelize this and keep track of whose shares we've
// already retrieved and don't resend. See
// https://github.com/oxidecomputer/omicron/issues/514
let mut shares = vec![share.share.clone()];
for agent in &other_agents {
let share = agent.request_share().await
.map_err(|e| {
info!(&self.log, "Bootstrap: failed to retreive share from peer: {:?}", e);
BackoffError::transient(e.into())
BackoffError::transient(
BootstrapError::TrustQuorum(e.into()),
)
})?;
info!(
&self.log,
Expand All @@ -360,7 +403,9 @@ impl Agent {
// the error returned from `RackSecret::combine_shares`.
// See https://github.com/oxidecomputer/omicron/issues/516
BackoffError::transient(
TrustQuorumError::RackSecretConstructionFailed(e),
BootstrapError::TrustQuorum(
TrustQuorumError::RackSecretConstructionFailed(e),
),
)
})?;
info!(self.log, "RackSecret computed from shares.");
Expand All @@ -386,7 +431,7 @@ impl Agent {
let rss = RssHandle::start_rss(
&self.parent_log,
rss_config.clone(),
self.peer_monitor.observer().await,
self.address,
self.sp.clone(),
// TODO-cleanup: Remove this arg once RSS can discover the trust
// quorum members over the management network.
Expand Down Expand Up @@ -424,6 +469,21 @@ impl Agent {
}
}

async fn advertise_bootstrap_address_via_ddmd(log: Logger, address: Ipv6Addr) {
let prefix = Ipv6Prefix { addr: address, mask: 64 };
retry_notify(internal_service_policy(), || async {
let client = DdmAdminClient::new(log.clone())?;
client.advertise_prefix(prefix).await?;
Ok(())
}, |err, duration| {
info!(
log,
"Failed to notify ddmd of our address (will retry after {duration:?}";
"err" => %err,
);
}).await.unwrap();
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
101 changes: 101 additions & 0 deletions sled-agent/src/bootstrap/ddm_admin_client.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Client to ddmd (the maghemite service running on localhost).
use ddm_admin_client::types::Ipv6Prefix;
use ddm_admin_client::Client;
use slog::Logger;
use std::net::Ipv6Addr;
use std::net::SocketAddr;
use std::net::SocketAddrV6;
use thiserror::Error;

use crate::bootstrap::agent::BOOTSTRAP_MASK;
use crate::bootstrap::agent::BOOTSTRAP_PREFIX;

// TODO-cleanup Is it okay to hardcode this port number and assume ddmd is bound
// to `::1`, or should we move that into our config?
const DDMD_PORT: u16 = 8000;

#[derive(Debug, Error)]
pub enum DdmError {
#[error("Failed to construct an HTTP client: {0}")]
HttpClient(#[from] reqwest::Error),

#[error("Failed making HTTP request to ddmd: {0}")]
DdmdApi(#[from] ddm_admin_client::Error<ddm_admin_client::types::Error>),
}

/// Manages Sled Discovery - both our announcement to other Sleds,
/// as well as our discovery of those sleds.
#[derive(Clone)]
pub struct DdmAdminClient {
client: Client,
log: Logger,
}

impl DdmAdminClient {
/// Creates a new [`PeerMonitor`].
pub fn new(log: Logger) -> Result<Self, DdmError> {
let dur = std::time::Duration::from_secs(60);
let ddmd_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, DDMD_PORT, 0, 0);

let client = reqwest::ClientBuilder::new()
.connect_timeout(dur)
.timeout(dur)
.build()?;
let client = Client::new_with_client(
&format!("http://{ddmd_addr}"),
client,
log.new(o!("DdmAdminClient" => SocketAddr::V6(ddmd_addr))),
);
Ok(DdmAdminClient { client, log })
}

/// Instruct ddmd to advertise the given prefix to peer sleds.
pub async fn advertise_prefix(
&self,
prefix: Ipv6Prefix,
) -> Result<(), DdmError> {
// TODO-cleanup Why does the generated openapi client require a `&Vec`
// instead of a `&[]`?
info!(
self.log, "Sending prefix to ddmd for advertisement";
"prefix" => ?prefix,
);
let prefixes = vec![prefix];
self.client.advertise_prefixes(&prefixes).await?;
Ok(())
}

/// Returns the addresses of connected sleds.
///
/// Note: These sleds have not yet been verified.
pub async fn peer_addrs(
&self,
) -> Result<impl Iterator<Item = Ipv6Addr> + '_, DdmError> {
let prefixes = self.client.get_prefixes().await?.into_inner();
info!(self.log, "Received prefixes from ddmd"; "prefixes" => ?prefixes);
Ok(prefixes.into_iter().filter_map(|(_, prefixes)| {
// If we receive multiple bootstrap prefixes from one peer, trim it
// down to just one. Connections on the bootstrap network are always
// authenticated via sprockets, which only needs one address.
prefixes.into_iter().find_map(|prefix| {
let mut segments = prefix.addr.segments();
if prefix.mask == BOOTSTRAP_MASK
&& segments[0] == BOOTSTRAP_PREFIX
{
// Bootstrap agent IPs always end in ::1; convert the
// `BOOTSTRAP_PREFIX::*/BOOTSTRAP_PREFIX` address we
// received into that specific address.
segments[7] = 1;
Some(Ipv6Addr::from(segments))
} else {
None
}
})
}))
}
}
Loading

0 comments on commit deab7db

Please sign in to comment.