Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug: project delete #1762

Merged
merged 3 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backends/src/test_utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod gateway;
pub mod provisioner;
pub mod resource_recorder;
51 changes: 51 additions & 0 deletions backends/src/test_utils/provisioner.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
use std::net::{Ipv4Addr, SocketAddr};

use async_trait::async_trait;
use portpicker::pick_unused_port;
use shuttle_proto::provisioner::{
provisioner_server::{Provisioner, ProvisionerServer},
DatabaseDeletionResponse, DatabaseRequest, DatabaseResponse, Ping, Pong,
};
use tonic::transport::Server;

struct ProvisionerMock;

#[async_trait]
impl Provisioner for ProvisionerMock {
async fn provision_database(
&self,
_request: tonic::Request<DatabaseRequest>,
) -> Result<tonic::Response<DatabaseResponse>, tonic::Status> {
panic!("no run tests should request a db");
}

async fn delete_database(
&self,
_request: tonic::Request<DatabaseRequest>,
) -> Result<tonic::Response<DatabaseDeletionResponse>, tonic::Status> {
panic!("no run tests should delete a db");
}

async fn health_check(
&self,
_request: tonic::Request<Ping>,
) -> Result<tonic::Response<Pong>, tonic::Status> {
panic!("no run tests should do a health check");
}
}

/// Start a mocked provisioner and return the port it started on
pub async fn get_mocked_provisioner() -> u16 {
let provisioner = ProvisionerMock;

let port = pick_unused_port().unwrap();
let provisioner_addr = SocketAddr::new(Ipv4Addr::UNSPECIFIED.into(), port);
tokio::spawn(async move {
Server::builder()
.add_service(ProvisionerServer::new(provisioner))
.serve(provisioner_addr)
.await
});

port
}
13 changes: 2 additions & 11 deletions backends/src/test_utils/resource_recorder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ impl ResourceRecorder for MockedResourceRecorder {
&self,
request: Request<RecordRequest>,
) -> Result<Response<ResultResponse>, Status> {
println!("recording resources");
println!("recording resources: {request:?}");

let RecordRequest {
project_id,
Expand Down Expand Up @@ -149,14 +149,6 @@ impl ResourceRecorder for MockedResourceRecorder {
r#type,
} = request.into_inner();

// Fail to delete a metadata resource if requested
if r#type == "metadata" {
return Ok(Response::new(ResultResponse {
success: false,
message: Default::default(),
}));
}

self.resources.lock().unwrap().retain(|r| {
!(r.project_id == project_id && r.service_id == service_id && r.r#type == r#type)
});
Expand All @@ -169,8 +161,7 @@ impl ResourceRecorder for MockedResourceRecorder {
}

/// Start a mocked resource recorder and return the port it started on
/// This mock will function like a normal resource recorder. However, it will always fail to delete metadata resources
/// if any tests need to simulate a failure.
/// This mock will function like a normal resource recorder.
pub async fn get_mocked_resource_recorder() -> u16 {
let resource_recorder = MockedResourceRecorder {
resources: Mutex::new(Vec::new()),
Expand Down
2 changes: 1 addition & 1 deletion common/src/models/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub const GIT_STRINGS_MAX_LENGTH: usize = 80;
pub const CREATE_SERVICE_BODY_LIMIT: usize = 50_000_000;
const GIT_OPTION_NONE_TEXT: &str = "N/A";

#[derive(Deserialize, Serialize)]
#[derive(Deserialize, Serialize, Debug)]
pub struct Response {
pub id: Uuid,
pub service_id: String,
Expand Down
89 changes: 32 additions & 57 deletions gateway/src/api/latest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use tokio::sync::mpsc::Sender;
use tokio::sync::{Mutex, MutexGuard};
use tower::ServiceBuilder;
use tower_http::cors::CorsLayer;
use tracing::{error, field, instrument, trace, Span};
use tracing::{debug, error, field, info, instrument, trace, warn, Span};
use ttl_cache::TtlCache;
use ulid::Ulid;
use uuid::Uuid;
Expand Down Expand Up @@ -309,49 +309,30 @@ async fn delete_project(

let project_id = Ulid::from_string(&project.id).expect("stored project id to be a valid ULID");

// Try to startup destroyed, errored or outdated projects
let project_deletable = project.state.is_ready() || project.state.is_stopped();
let current_version: semver::Version = env!("CARGO_PKG_VERSION")
.parse()
.expect("to have a valid semver gateway version");

let version = project
.state
.container()
.and_then(|container_inspect_response| {
container_inspect_response.image.and_then(|inner| {
inner
.strip_prefix("public.ecr.aws/shuttle/deployer:v")
.and_then(|x| x.parse::<semver::Version>().ok())
})
})
// Defaulting to a version that introduced a breaking change.
// This was the last one that introduced it at the present
// moment.
.unwrap_or(semver::Version::new(0, 39, 0));

// We restart the project before deletion everytime
// we detect it is outdated, so that we avoid by default
// breaking changes that can happen on the deployer
// side in the future.
if !project_deletable || version < current_version {
let handle = state
.service
.new_task()
.project(project_name.clone())
.and_then(task::restart(project_id))
.and_then(task::run_until_done())
.send(&state.sender)
.await?;
let handle = state
.service
.new_task()
.project(project_name.clone())
.and_then(task::destroy()) // This destroy might only recover the project from an errored state
.and_then(task::run_until_destroyed())
.and_then(task::restart(project_id))
.and_then(task::run_until_ready())
.and_then(task::destroy())
.and_then(task::run_until_destroyed())
.and_then(task::restart(project_id))
.and_then(task::run_until_ready())
.send(&state.sender)
.await?;

// Wait for the project to be ready
handle.await;
// Wait for the project to be ready
handle.await;

let new_state = state.service.find_project_by_name(&project_name).await?;
let new_state = state.service.find_project_by_name(&project_name).await?;

if !new_state.state.is_ready() {
return Err(ProjectCorrupted.into());
}
if !new_state.state.is_ready() {
warn!(state = ?new_state.state, "failed to restart project");
return Err(ProjectCorrupted.into());
}

let service = state.service.clone();
Expand All @@ -360,8 +341,10 @@ async fn delete_project(
let project_caller =
ProjectCaller::new(state.clone(), scoped_user.clone(), req.headers()).await?;

trace!("getting deployments");
// check that a deployment is not running
let mut deployments = project_caller.get_deployment_list().await?;
debug!(?deployments, "got deployments");
deployments.sort_by_key(|d| d.last_update);

// Make sure no deployment is in the building pipeline
Expand All @@ -376,6 +359,7 @@ async fn delete_project(
});

if has_bad_state {
warn!("has bad state");
return Err(ProjectHasBuildingDeployment.into());
}

Expand All @@ -384,6 +368,7 @@ async fn delete_project(
.filter(|d| d.state == deployment::State::Running);

for running_deployment in running_deployments {
info!(%running_deployment, "stopping running deployment");
let res = project_caller
.stop_deployment(&running_deployment.id)
.await?;
Expand All @@ -393,11 +378,13 @@ async fn delete_project(
}
}

trace!("getting resources");
// check if any resources exist
let resources = project_caller.get_resources().await?;
let mut delete_fails = Vec::new();

for resource in resources {
info!(?resource, "deleting resource");
let resource_type = resource.r#type.to_string();
let res = project_caller.delete_resource(&resource_type).await?;

Expand All @@ -410,6 +397,7 @@ async fn delete_project(
return Err(ProjectHasResources(delete_fails).into());
}

trace!("deleting container");
let task = service
.new_task()
.project(project_name.clone())
Expand All @@ -418,6 +406,7 @@ async fn delete_project(
.await?;
task.await;

trace!("removing project from state");
service.delete_project(&project_name).await?;

Ok(AxumJson("project successfully deleted".to_owned()))
Expand Down Expand Up @@ -688,7 +677,7 @@ async fn get_status(
};

// Compute provisioner status.
let provisioner_status = if let Ok(channel) = service.provisioner_host().connect().await {
let provisioner_status = if let Ok(channel) = service.provisioner_uri().connect().await {
let channel = ServiceBuilder::new().service(channel);
let mut provisioner_client = ProvisionerClient::new(channel);
if provisioner_client.health_check(Ping {}).await.is_ok() {
Expand Down Expand Up @@ -1736,20 +1725,6 @@ pub mod tests {
);
}

#[test_context(TestProject)]
#[tokio::test]
async fn api_delete_project_that_has_resources_but_fails_to_remove_them(
project: &mut TestProject,
) {
project.deploy("../examples/axum/metadata").await;
project.stop_service().await;

assert_eq!(
project.router_call(Method::DELETE, "/delete").await,
StatusCode::INTERNAL_SERVER_ERROR
);
}

#[test_context(TestProject)]
#[tokio::test]
async fn api_delete_project_that_has_running_deployment(project: &mut TestProject) {
Expand All @@ -1767,11 +1742,11 @@ pub mod tests {
project.just_deploy("../examples/axum/hello-world").await;

// Wait a bit to it to progress in the queue
sleep(Duration::from_secs(2)).await;
sleep(Duration::from_secs(10)).await;

assert_eq!(
project.router_call(Method::DELETE, "/delete").await,
StatusCode::BAD_REQUEST
StatusCode::OK
);
}

Expand Down
4 changes: 2 additions & 2 deletions gateway/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ pub struct ServiceArgs {
pub prefix: String,
/// The address at which an active runtime container will find
/// the provisioner service
#[arg(long, default_value = "provisioner")]
pub provisioner_host: String,
#[arg(long, default_value = "http://provisioner:8000")]
pub provisioner_uri: String,
/// Address to reach the authentication service at
#[arg(long, default_value = "http://127.0.0.1:8008")]
pub auth_uri: Uri,
Expand Down
8 changes: 4 additions & 4 deletions gateway/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ pub mod tests {
use ring::signature::{self, Ed25519KeyPair, KeyPair};
use shuttle_backends::auth::ConvertResponse;
use shuttle_backends::test_utils::gateway::PermissionsMock;
use shuttle_backends::test_utils::provisioner::get_mocked_provisioner;
use shuttle_backends::test_utils::resource_recorder::get_mocked_resource_recorder;
use shuttle_common::claims::{AccountTier, Claim};
use shuttle_common::models::deployment::DeploymentRequest;
Expand Down Expand Up @@ -399,6 +400,7 @@ pub mod tests {
let auth: SocketAddr = format!("0.0.0.0:{auth_port}").parse().unwrap();
let auth_uri: Uri = format!("http://{auth}").parse().unwrap();
let resource_recorder_port = get_mocked_resource_recorder().await;
let provisioner_port = get_mocked_provisioner().await;

let auth_service = AuthService::new(auth);
auth_service
Expand All @@ -418,8 +420,6 @@ pub mod tests {
let network_name =
env::var("SHUTTLE_TESTS_NETWORK").unwrap_or_else(|_| "shuttle_default".to_string());

let provisioner_host = "provisioner".to_string();

let docker_host = "/var/run/docker.sock".to_string();

let args = StartArgs {
Expand All @@ -432,9 +432,9 @@ pub mod tests {
docker_host,
image,
prefix,
provisioner_host,
provisioner_uri: format!("http://host.docker.internal:{provisioner_port}"),
// The started containers need to reach auth on the host.
// For this to work, the firewall should not be blocking traffic on the `SHUTTLE_TEST_NETWORK` interface.
// For this to work, the firewall should not be blocking traffic on the `SHUTTLE_TESTS_NETWORK` interface.
// The following command can be used on NixOs to allow traffic on the interface.
// ```
// sudo iptables -I nixos-fw -i <interface> -j nixos-fw-accept
Expand Down
6 changes: 3 additions & 3 deletions gateway/src/project.rs
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ impl ProjectCreating {
let ContainerSettings {
image: default_image,
prefix,
provisioner_host,
provisioner_uri,
auth_uri,
resource_recorder_uri,
extra_hosts,
Expand Down Expand Up @@ -902,7 +902,7 @@ impl ProjectCreating {
"--api-address",
format!("0.0.0.0:{RUNTIME_API_PORT}"),
"--provisioner-address",
format!("http://{provisioner_host}:8000"),
provisioner_uri,
"--artifacts-path",
"/opt/shuttle",
"--state",
Expand Down Expand Up @@ -1732,7 +1732,7 @@ where

#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct ProjectDestroyed {
destroyed: Option<ContainerInspectResponse>,
pub destroyed: Option<ContainerInspectResponse>,
}

#[async_trait]
Expand Down
Loading