Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v6.3.2: Fix a deployment directory leak #1793

Merged
merged 11 commits into from
Mar 3, 2024
6 changes: 3 additions & 3 deletions build/TestCommon.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

<ItemGroup>
<!-- Usage: Code coverage collection -->
<PackageReference Include="coverlet.collector" Version="6.0.0">
<PackageReference Include="coverlet.collector" Version="6.0.1">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand All @@ -18,9 +18,9 @@
<!-- Pinned: Be VERY careful about updating https://github.com/moq/moq/issues/1372 -->
<PackageReference Include="Moq" Version="4.20.70" />
<!-- Usage: MSTest execution -->
<PackageReference Include="MSTest.TestAdapter" Version="3.2.1" />
<PackageReference Include="MSTest.TestAdapter" Version="3.2.2" />
<!-- Usage: MSTest asserts etc... -->
<PackageReference Include="MSTest.TestFramework" Version="3.2.1" />
<PackageReference Include="MSTest.TestFramework" Version="3.2.2" />
</ItemGroup>

</Project>
2 changes: 1 addition & 1 deletion build/Version.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<!-- Integration tests will ensure they match across the board -->
<Import Project="WebpanelVersion.props" />
<PropertyGroup>
<TgsCoreVersion>6.3.1</TgsCoreVersion>
<TgsCoreVersion>6.3.2</TgsCoreVersion>
<TgsConfigVersion>5.1.0</TgsConfigVersion>
<TgsApiVersion>10.2.0</TgsApiVersion>
<TgsCommonLibraryVersion>7.0.0</TgsCommonLibraryVersion>
Expand Down
55 changes: 42 additions & 13 deletions src/Tgstation.Server.Host/Components/Deployment/DmbFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

Expand Down Expand Up @@ -178,7 +179,8 @@ public IDmbProvider LockNextDmb(int lockCount)
{
var jobId = nextDmbProvider.CompileJob.Require(x => x.Id);
var incremented = jobLockCounts[jobId] += lockCount;
logger.LogTrace("Compile job {jobId} lock count now: {lockCount}", jobId, incremented);
logger.LogTrace("Compile job {jobId} lock increased by: {increment}", jobId, lockCount);
LogLockCounts();
return nextDmbProvider;
}
}
Expand Down Expand Up @@ -325,14 +327,18 @@ void CleanupAction()
if (!jobLockCounts.TryGetValue(compileJobId, out int value))
{
value = 1;
logger.LogTrace("Initializing lock count for compile job {id}", compileJobId);
jobLockCounts.Add(compileJobId, 1);
}
else
{
logger.LogTrace("FromCompileJob already had a jobLockCounts entry for {id}. Incrementing lock count to {value}.", compileJobId, value);
jobLockCounts[compileJobId] = ++value;
}

providerSubmitted = true;

logger.LogTrace("Compile job {id} lock count now: {lockCount}", compileJobId, value);
LogLockCounts();
return newProvider;
}
}
Expand Down Expand Up @@ -385,7 +391,7 @@ await databaseContextFactory.UseContext(async db =>
await ioManager.CreateDirectory(gameDirectory, cancellationToken);
var directories = await ioManager.GetDirectories(gameDirectory, cancellationToken);
int deleting = 0;
var tasks = directories.Select(async x =>
var tasks = directories.Select<string, ValueTask>(async x =>
{
var nameOnly = ioManager.GetFileName(x);
if (jobUidsToNotErase.Contains(nameOnly))
Expand All @@ -396,17 +402,13 @@ await databaseContextFactory.UseContext(async db =>
++deleting;
await DeleteCompileJobContent(x, cancellationToken);
}
catch (OperationCanceledException)
{
throw;
}
catch (Exception e)
catch (Exception e) when (e is not OperationCanceledException)
{
logger.LogWarning(e, "Error deleting directory {dirName}!", x);
}
}).ToList();
if (deleting > 0)
await Task.WhenAll(tasks);
await ValueTaskExtensions.WhenAll(tasks);
}
#pragma warning restore CA1506

Expand Down Expand Up @@ -435,14 +437,14 @@ async Task WrapThrowableTasks()
// First kill the GitHub deployment
var remoteDeploymentManager = remoteDeploymentManagerFactory.CreateRemoteDeploymentManager(metadata, job);

// DCT: None available
var deploymentJob = remoteDeploymentManager.MarkInactive(job, CancellationToken.None);
var cancellationToken = cleanupCts.Token;
var deploymentJob = remoteDeploymentManager.MarkInactive(job, cancellationToken);

var deleteTask = DeleteCompileJobContent(job.DirectoryName!.Value.ToString(), cleanupCts.Token);
var deleteTask = DeleteCompileJobContent(job.DirectoryName!.Value.ToString(), cancellationToken);

await ValueTaskExtensions.WhenAll(deleteTask, deploymentJob);
}
catch (Exception ex)
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "Error cleaning up compile job {jobGuid}!", job.DirectoryName);
}
Expand All @@ -468,6 +470,8 @@ async Task WrapThrowableTasks()
}
else
logger.LogError("Extra Dispose of DmbProvider for CompileJob {compileJobId}!", jobId);

LogLockCounts();
}
}

Expand All @@ -483,5 +487,30 @@ async ValueTask DeleteCompileJobContent(string directory, CancellationToken canc
await eventConsumer.HandleEvent(EventType.DeploymentCleanup, new List<string> { ioManager.ResolvePath(directory) }, true, cancellationToken);
await ioManager.DeleteDirectory(directory, cancellationToken);
}

/// <summary>
/// Log out the current lock counts to Trace.
/// </summary>
/// <remarks><see cref="jobLockCounts"/> must be locked before calling this function.</remarks>
void LogLockCounts()
{
if (jobLockCounts.Count == 0)
{
logger.LogWarning("No compile jobs registered!");
return;
}

var builder = new StringBuilder();
foreach (var jobId in jobLockCounts.Keys)
{
builder.AppendLine();
builder.Append("\t- ");
builder.Append(jobId);
builder.Append(": ");
builder.Append(jobLockCounts[jobId]);
}

logger.LogTrace("Compile Job Lock Counts:{details}", builder.ToString());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public ValueTask MakeActive(CancellationToken cancellationToken)
}

/// <summary>
/// Should be <see langword="await"/>. before calling <see cref="MakeActive(CancellationToken)"/> to ensure the <see cref="SwappableDmbProvider"/> is ready to instantly swap. Can be called multiple times.
/// Should be <see langword="await"/>ed. before calling <see cref="MakeActive(CancellationToken)"/> to ensure the <see cref="SwappableDmbProvider"/> is ready to instantly swap. Can be called multiple times.
/// </summary>
/// <param name="cancellationToken">The <see cref="CancellationToken"/> for the operation.</param>
/// <returns>A <see cref="Task"/> representing the preparation process.</returns>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,12 @@ protected sealed override async ValueTask DisposeAndNullControllersImpl()

// If we reach this point, we can guarantee PrepServerForLaunch will be called before starting again.
ActiveSwappable = null;
await (pendingSwappable?.DisposeAsync() ?? ValueTask.CompletedTask);
pendingSwappable = null;

if (pendingSwappable != null)
{
await pendingSwappable.DisposeAsync();
pendingSwappable = null;
}

await DrainDeploymentCleanupTasks(true);
}
Expand All @@ -138,8 +142,6 @@ protected sealed override async ValueTask<MonitorAction> HandleNormalReboot(Canc
{
if (pendingSwappable != null)
{
ValueTask RunPrequel() => BeforeApplyDmb(pendingSwappable.CompileJob, cancellationToken);

var needToSwap = !pendingSwappable.Swapped;
var controller = Server!;
if (needToSwap)
Expand All @@ -151,7 +153,6 @@ protected sealed override async ValueTask<MonitorAction> HandleNormalReboot(Canc
// integration test logging will catch this
Logger.LogError(
"The reboot bridge request completed before the watchdog could suspend the server! This can lead to buggy DreamDaemon behaviour and should be reported! To ensure stability, we will need to hard reboot the server");
await RunPrequel();
return MonitorAction.Restart;
}

Expand All @@ -165,7 +166,7 @@ protected sealed override async ValueTask<MonitorAction> HandleNormalReboot(Canc
}
}

var updateTask = RunPrequel();
var updateTask = BeforeApplyDmb(pendingSwappable.CompileJob, cancellationToken);
if (needToSwap)
await PerformDmbSwap(pendingSwappable, cancellationToken);

Expand All @@ -183,7 +184,8 @@ async Task CleanupLingeringDeployment()
currentCompileJobId,
lingeringDeploymentExpirySeconds);

var timeout = AsyncDelayer.Delay(TimeSpan.FromSeconds(lingeringDeploymentExpirySeconds), cancellationToken);
// DCT: A cancel firing here can result in us leaving a dmbprovider undisposed, localDeploymentCleanupGate will always fire in that case
var timeout = AsyncDelayer.Delay(TimeSpan.FromSeconds(lingeringDeploymentExpirySeconds), CancellationToken.None);

var completedTask = await Task.WhenAny(
localDeploymentCleanupGate.Task,
Expand Down
4 changes: 2 additions & 2 deletions src/Tgstation.Server.Host/Tgstation.Server.Host.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
<!-- Usage: GitHub.com interop -->
<PackageReference Include="Octokit" Version="10.0.0" />
<!-- Usage: MYSQL/MariaDB ORM plugin -->
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="8.0.0" />
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="8.0.1" />
<!-- Usage: Discord interop -->
<PackageReference Include="Remora.Discord" Version="2024.1.0" />
<!-- Usage: Rich logger builder -->
Expand All @@ -128,7 +128,7 @@
<!-- Usage: Temporary resolution to compatibility issues with EFCore 7 and .NET 8 -->
<PackageReference Include="System.Security.Permissions" Version="8.0.0" />
<!-- Usage: .DeleteAsync() support for IQueryable<T>s -->
<PackageReference Include="Z.EntityFramework.Plus.EFCore" Version="8.102.1" />
<PackageReference Include="Z.EntityFramework.Plus.EFCore" Version="8.102.1.1" />
</ItemGroup>

<ItemGroup>
Expand Down
Loading