Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sled-agent] PUT zones doesn't create datasets (#7006) #7160

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/src/ledger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ impl<T: Ledgerable> Ledger<T> {
warn!(self.log, "Failed to write ledger"; "path" => ?path, "err" => ?e);
failed_paths.push((path.to_path_buf(), e));
} else {
info!(self.log, "Successfully wrote to ledger"; "path" => ?path);
one_successful_write = true;
}
}
Expand Down
18 changes: 17 additions & 1 deletion nexus-sled-agent-shared/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use omicron_common::{
external::{ByteCount, Generation},
internal::shared::{NetworkInterface, SourceNatConfig},
},
disk::DiskVariant,
disk::{DatasetKind, DatasetName, DiskVariant},
zpool_name::ZpoolName,
};
use omicron_uuid_kinds::{DatasetUuid, OmicronZoneUuid};
Expand Down Expand Up @@ -168,6 +168,22 @@ impl OmicronZoneConfig {
pub fn underlay_ip(&self) -> Ipv6Addr {
self.zone_type.underlay_ip()
}

/// If this zone has a transient filesystem, return the dataset's name.
/// Otherwise, return `None`.
pub fn transient_zone_dataset_name(&self) -> Option<DatasetName> {
self.filesystem_pool.as_ref().map(|pool| {
DatasetName::new(
pool.clone(),
DatasetKind::TransientZone {
name: illumos_utils::zone::zone_name(
self.zone_type.kind().zone_prefix(),
Some(self.id),
),
},
)
})
}
}

/// Describes a persistent ZFS dataset associated with an Omicron zone
Expand Down
66 changes: 66 additions & 0 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET};
use sled_storage::manager::StorageHandle;
use slog::Logger;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::net::{IpAddr, Ipv6Addr, SocketAddr};
use std::str::FromStr;
Expand Down Expand Up @@ -275,6 +276,12 @@ pub enum Error {
#[error("Error querying simnet devices")]
Simnet(#[from] GetSimnetError),

#[error("Failed to access storage")]
Storage(#[from] sled_storage::error::Error),

#[error("Missing datasets: {datasets:?}")]
MissingDatasets { datasets: BTreeSet<DatasetName> },

#[error(
"Requested generation ({requested}) is older than current ({current})"
)]
Expand Down Expand Up @@ -3471,6 +3478,50 @@ impl ServiceManager {
Ok(())
}

// Compares the incoming set of zones with the datasets this sled is
// configured to use.
//
// If the requested zones contain any datasets not configured on this sled,
// an error is returned.
async fn check_requested_zone_datasets_exist(
&self,
request: &OmicronZonesConfig,
) -> Result<(), Error> {
// Before we provision these zones, inspect the request, and
// check all the datasets we're trying to make.
let mut requested_datasets = BTreeSet::new();
for zone in &request.zones {
if let Some(dataset) = zone.dataset_name() {
requested_datasets.insert(dataset);
}
if let Some(dataset) = zone.transient_zone_dataset_name() {
requested_datasets.insert(dataset);
}
}

// These datasets are configured to be part of the control plane.
let datasets_config = self.inner.storage.datasets_config_list().await?;
let existing_datasets: BTreeSet<_> = datasets_config
.datasets
.into_values()
.map(|config| config.name)
.collect();

// Check that the request is no larger than the configured set.
//
// (It's fine to have "existing_datasets" not contained in the
// request set).
let missing_datasets: BTreeSet<_> = requested_datasets
.difference(&existing_datasets)
.cloned()
.collect();
if !missing_datasets.is_empty() {
warn!(self.inner.log, "Missing datasets: {missing_datasets:?}");
return Err(Error::MissingDatasets { datasets: missing_datasets });
}
Ok(())
}

// Ensures that only the following Omicron zones are running.
//
// This method strives to be idempotent.
Expand All @@ -3494,6 +3545,8 @@ impl ServiceManager {
new_request: OmicronZonesConfig,
fake_install_dir: Option<&String>,
) -> Result<(), Error> {
self.check_requested_zone_datasets_exist(&new_request).await?;

// Do some data-normalization to ensure we can compare the "requested
// set" vs the "existing set" as HashSets.
let old_zone_configs: HashSet<OmicronZoneConfig> = existing_zones
Expand Down Expand Up @@ -4695,6 +4748,7 @@ mod illumos_tests {
zone::MockZones,
};

use omicron_common::disk::DatasetsConfig;
use omicron_uuid_kinds::OmicronZoneUuid;
use sled_storage::manager_test_harness::StorageManagerTestHarness;
use std::os::unix::process::ExitStatusExt;
Expand Down Expand Up @@ -5040,6 +5094,18 @@ mod illumos_tests {
.await
.expect("Failed to ensure disks");
assert!(!result.has_error(), "{:?}", result);
let result = harness
.handle()
.datasets_ensure(DatasetsConfig {
generation: Generation::new(),
datasets: BTreeMap::new(),
})
.await
.unwrap();
assert!(
!result.has_error(),
"Failed to ensure empty dataset ledger: {result:?}"
);
harness
}

Expand Down
19 changes: 0 additions & 19 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ use crate::metrics::MetricsManager;
use crate::nexus::{
NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask,
};
use crate::params::OmicronZoneTypeExt;
use crate::probe_manager::ProbeManager;
use crate::services::{self, ServiceManager};
use crate::storage_monitor::StorageMonitorHandle;
Expand Down Expand Up @@ -943,24 +942,6 @@ impl SledAgent {
&self,
requested_zones: OmicronZonesConfig,
) -> Result<(), Error> {
// TODO(https://github.com/oxidecomputer/omicron/issues/6043):
// - If these are the set of filesystems, we should also consider
// removing the ones which are not listed here.
// - It's probably worth sending a bulk request to the storage system,
// rather than requesting individual datasets.
for zone in &requested_zones.zones {
let Some(dataset_name) = zone.dataset_name() else {
continue;
};

// First, ensure the dataset exists
let dataset_id = zone.id.into_untyped_uuid();
self.inner
.storage
.upsert_filesystem(dataset_id, dataset_name)
.await?;
}

self.inner
.services
.ensure_all_omicron_zones_persistent(requested_zones, None)
Expand Down
25 changes: 16 additions & 9 deletions sled-storage/src/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,7 @@ impl StorageManager {
let ledger_paths = self.all_omicron_dataset_ledgers().await;
let maybe_ledger =
Ledger::<DatasetsConfig>::new(&log, ledger_paths.clone()).await;
let had_old_ledger = maybe_ledger.is_some();

let mut ledger = match maybe_ledger {
Some(ledger) => {
Expand Down Expand Up @@ -914,12 +915,14 @@ impl StorageManager {
let result = self.datasets_ensure_internal(&log, &config).await;

let ledger_data = ledger.data_mut();
if *ledger_data == config {
return Ok(result);
}
*ledger_data = config;
ledger.commit().await?;

// Commit the ledger to storage if either:
// - No ledger exists in storage, or
// - The ledger has changed
if !had_old_ledger || *ledger_data != config {
*ledger_data = config;
ledger.commit().await?;
}
Ok(result)
}

Expand Down Expand Up @@ -1142,6 +1145,7 @@ impl StorageManager {
ledger_paths.clone(),
)
.await;
let had_old_ledger = maybe_ledger.is_some();

let mut ledger = match maybe_ledger {
Some(ledger) => {
Expand Down Expand Up @@ -1181,11 +1185,14 @@ impl StorageManager {
self.omicron_physical_disks_ensure_internal(&log, &config).await?;

let ledger_data = ledger.data_mut();
if *ledger_data == config {
return Ok(result);

// Commit the ledger to storage if either:
// - No ledger exists in storage, or
// - The ledger has changed
if !had_old_ledger || *ledger_data != config {
*ledger_data = config;
ledger.commit().await?;
}
*ledger_data = config;
ledger.commit().await?;

Ok(result)
}
Expand Down
Loading