Skip to content

Commit

Permalink
Tar: support GNU numeric format. (#101172)
Browse files Browse the repository at this point in the history
The tar specification stores numeric fields using an octal representation. This limits the range of values that can be stored.

To increase the supported range, a GNU extension defines that when the leading byte is 0xff/0x80 the remaining bytes are a negative/positive big endian formatted value.

When writing under the PAX format, we continue to only use the only octal representation in the header fields. The values are overridden using extended attributes.
  • Loading branch information
tmds authored Jun 25, 2024
1 parent afe6fbd commit c5e8f83
Show file tree
Hide file tree
Showing 18 changed files with 539 additions and 343 deletions.
4 changes: 2 additions & 2 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@
<data name="TarEntryFieldExceedsMaxLength" xml:space="preserve">
<value>The field '{0}' exceeds the maximum allowed length for this format.</value>
</data>
<data name="TarSizeFieldTooLargeForEntryFormat" xml:space="preserve">
<value>The value of the size field for the current entry of format '{0}' is greater than the format allows.</value>
<data name="TarFieldTooLargeForEntryFormat" xml:space="preserve">
<value>The value of the field for the current entry of format '{0}' is greater than the format allows.</value>
</data>
<data name="TarExtAttrDisallowedKeyChar" xml:space="preserve">
<value>The extended attribute key '{0}' contains a disallowed '{1}' character.</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ public DateTimeOffset AccessTime
get => _header._aTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
_header._aTime = value;
}
}
Expand All @@ -112,7 +111,6 @@ public DateTimeOffset ChangeTime
get => _header._cTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
_header._cTime = value;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ internal PosixTarEntry(TarEntry other, TarEntryFormat format)
/// </summary>
/// <remarks>Character and block devices are Unix-specific entry types.</remarks>
/// <exception cref="InvalidOperationException">The entry does not represent a block device or a character device.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151 when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
public int DeviceMajor
{
get => _header._devMajor;
Expand All @@ -62,7 +62,10 @@ public int DeviceMajor
}

ArgumentOutOfRangeException.ThrowIfNegative(value);
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
if (FormatIsOctalOnly)
{
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
}

_header._devMajor = value;
}
Expand All @@ -73,7 +76,7 @@ public int DeviceMajor
/// </summary>
/// <remarks>Character and block devices are Unix-specific entry types.</remarks>
/// <exception cref="InvalidOperationException">The entry does not represent a block device or a character device.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151 when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
public int DeviceMinor
{
get => _header._devMinor;
Expand All @@ -85,7 +88,10 @@ public int DeviceMinor
}

ArgumentOutOfRangeException.ThrowIfNegative(value);
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
if (FormatIsOctalOnly)
{
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
}

_header._devMinor = value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ public abstract partial class TarEntry
// Used to access the data section of this entry in an unseekable file
private TarReader? _readerOfOrigin;

// These formats have a limited numeric range due to the octal number representation.
protected bool FormatIsOctalOnly => _header._format is TarEntryFormat.V7 or TarEntryFormat.Ustar;

// Constructor called when reading a TarEntry from a TarReader.
internal TarEntry(TarHeader header, TarReader readerOfOrigin, TarEntryFormat format)
{
Expand Down Expand Up @@ -92,13 +95,16 @@ public int Gid
/// A timestamps that represents the last time the contents of the file represented by this entry were modified.
/// </summary>
/// <remarks>In Unix platforms, this timestamp is commonly known as <c>mtime</c>.</remarks>
/// <exception cref="ArgumentOutOfRangeException">The specified value is larger than <see cref="DateTimeOffset.UnixEpoch"/>.</exception>
/// <exception cref="ArgumentOutOfRangeException">The specified value is larger than <see cref="DateTimeOffset.UnixEpoch"/> when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
public DateTimeOffset ModificationTime
{
get => _header._mTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
if (FormatIsOctalOnly)
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
}
_header._mTime = value;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
return null;
}

long size = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
Debug.Assert(size <= TarHelpers.MaxSizeLength, "size exceeded the max value possible with 11 octal digits. Actual size " + size);
long size = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
if (size < 0)
{
throw new InvalidDataException(SR.Format(SR.TarSizeFieldNegative));
Expand All @@ -384,14 +383,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
// Continue with the rest of the fields that require no special checks
TarHeader header = new(initialFormat,
name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
mode: (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
mode: TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag])
{
_checksum = checksum,
_size = size,
_uid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
_gid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
_uid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
_gid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
_linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
};

Expand Down Expand Up @@ -524,10 +523,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice)
{
// Major number for a character device or block device entry.
_devMajor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
_devMajor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));

// Minor number for a character device or block device entry.
_devMinor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
_devMinor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
}
}

Expand All @@ -536,10 +535,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
private void ReadGnuAttributes(Span<byte> buffer)
{
// Convert byte arrays
long aTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
long aTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
_aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime);

long cTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
long cTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
_cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime);

// TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Buffers.Binary;
using System.Buffers.Text;
using System.Collections.Generic;
using System.Diagnostics;
Expand All @@ -15,6 +16,9 @@ namespace System.Formats.Tar
// Writes header attributes of a tar archive entry.
internal sealed partial class TarHeader
{
private const long Octal12ByteFieldMaxValue = (1L << (3 * 11)) - 1; // Max value of 11 octal digits.
private const int Octal8ByteFieldMaxValue = (1 << (3 * 7)) - 1; // Max value of 7 octal digits.

private static ReadOnlySpan<byte> UstarMagicBytes => "ustar\0"u8;
private static ReadOnlySpan<byte> UstarVersionBytes => "00"u8;

Expand Down Expand Up @@ -606,35 +610,22 @@ private int WriteCommonFields(Span<byte> buffer, TarEntryType actualEntryType)

if (_mode > 0)
{
checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
checksum += FormatNumeric(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
}

if (_uid > 0)
{
checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
checksum += FormatNumeric(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
}

if (_gid > 0)
{
checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
checksum += FormatNumeric(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
}

if (_size > 0)
{
if (_size <= TarHelpers.MaxSizeLength)
{
checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
}
else if (_format is not TarEntryFormat.Pax)
{
throw new ArgumentException(SR.Format(SR.TarSizeFieldTooLargeForEntryFormat, _format));
}
else
{
// No writing, just verifications
Debug.Assert(_typeFlag is not TarEntryType.ExtendedAttributes and not TarEntryType.GlobalExtendedAttributes);
Debug.Assert(Convert.ToInt64(ExtendedAttributes[PaxEaSize]) > TarHelpers.MaxSizeLength);
}
checksum += FormatNumeric(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
}

checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime));
Expand Down Expand Up @@ -739,12 +730,12 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)

if (_devMajor > 0)
{
checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
checksum += FormatNumeric(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
}

if (_devMinor > 0)
{
checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
checksum += FormatNumeric(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
}

return checksum;
Expand Down Expand Up @@ -916,7 +907,7 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
ExtendedAttributes[PaxEaLinkName] = _linkName;
}

if (_size > TarHelpers.MaxSizeLength)
if (_size > Octal12ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaSize] = _size.ToString();
}
Expand All @@ -925,6 +916,42 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
ExtendedAttributes.Remove(PaxEaSize);
}

if (_uid > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaUid] = _uid.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaUid);
}

if (_gid > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaGid] = _gid.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaGid);
}

if (_devMajor > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaDevMajor] = _devMajor.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaDevMajor);
}

if (_devMinor > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaDevMinor] = _devMinor.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaDevMinor);
}

// Sets the specified string to the dictionary if it's longer than the specified max byte length; otherwise, remove it.
static void TryAddStringField(Dictionary<string, string> extendedAttributes, string key, string? value, int maxLength)
{
Expand Down Expand Up @@ -1022,6 +1049,56 @@ private static int Checksum(ReadOnlySpan<byte> bytes)
return checksum;
}

private int FormatNumeric(int value, Span<byte> destination)
{
Debug.Assert(destination.Length == 8, "8 byte field expected.");

bool isOctalRange = value >= 0 && value <= Octal8ByteFieldMaxValue;

if (isOctalRange || _format == TarEntryFormat.Pax)
{
return FormatOctal(value, destination);
}
else if (_format == TarEntryFormat.Gnu)
{
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
// store positive numbers in big endian format with leading '0x80' byte.
long destinationValue = value;
destinationValue |= 1L << 63;
BinaryPrimitives.WriteInt64BigEndian(destination, destinationValue);
return Checksum(destination);
}
else
{
throw new ArgumentException(SR.Format(SR.TarFieldTooLargeForEntryFormat, _format));
}
}

private int FormatNumeric(long value, Span<byte> destination)
{
Debug.Assert(destination.Length == 12, "12 byte field expected.");
const int Offset = 4; // 4 bytes before the long.

bool isOctalRange = value >= 0 && value <= Octal12ByteFieldMaxValue;

if (isOctalRange || _format == TarEntryFormat.Pax)
{
return FormatOctal(value, destination);
}
else if (_format == TarEntryFormat.Gnu)
{
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
// store positive numbers in big endian format with leading '0x80' byte.
BinaryPrimitives.WriteUInt32BigEndian(destination, value < 0 ? 0xffffffff : 0x80000000);
BinaryPrimitives.WriteInt64BigEndian(destination.Slice(Offset), value);
return Checksum(destination);
}
else
{
throw new ArgumentException(SR.Format(SR.TarFieldTooLargeForEntryFormat, _format));
}
}

// Writes the specified decimal number as a right-aligned octal number and returns its checksum.
private static int FormatOctal(long value, Span<byte> destination)
{
Expand All @@ -1040,11 +1117,11 @@ private static int FormatOctal(long value, Span<byte> destination)
return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination);
}

// Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum.
private static int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
// Writes the specified DateTimeOffset's Unix time seconds, and returns its checksum.
private int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
{
long unixTimeSeconds = timestamp.ToUnixTimeSeconds();
return FormatOctal(unixTimeSeconds, destination);
return FormatNumeric(unixTimeSeconds, destination);
}

// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ internal static partial class TarHelpers
{
internal const short RecordSize = 512;
internal const int MaxBufferLength = 4096;
internal const long MaxSizeLength = (1L << 33) - 1; // Max value of 11 octal digits = 2^33 - 1 or 8 Gb.

internal const UnixFileMode ValidUnixFileModes =
UnixFileMode.UserRead |
Expand Down Expand Up @@ -215,6 +214,29 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format,
return entryType;
}

/// <summary>Parses a numeric field.</summary>
internal static T ParseNumeric<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>, IBinaryInteger<T>
{
// The tar standard specifies that numeric fields are stored using an octal representation.
// This limits the range of values that can be stored in the fields.
// To increase the supported range, a GNU extension defines that when the leading byte is
// '0xff'/'0x80' the remaining bytes are a negative/positive big formatted endian value.
// Like the 'tar' tool we are permissive when encountering this representation in non GNU formats.
byte leadingByte = buffer[0];
if (leadingByte == 0xff)
{
return T.ReadBigEndian(buffer, isUnsigned: false);
}
else if (leadingByte == 0x80)
{
return T.ReadBigEndian(buffer.Slice(1), isUnsigned: true);
}
else
{
return ParseOctal<T>(buffer);
}
}

/// <summary>Parses a byte span that represents an ASCII string containing a number in octal base.</summary>
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
{
Expand Down
Loading

0 comments on commit c5e8f83

Please sign in to comment.