Skip to content

Commit

Permalink
Simplify TIFF processing by 'shifting' base of indexed reader, rather…
Browse files Browse the repository at this point in the history
… than passing TIFF header offsets around everywhere.
  • Loading branch information
drewnoakes committed Oct 7, 2016
1 parent c6e47ab commit 2281cea
Show file tree
Hide file tree
Showing 10 changed files with 183 additions and 92 deletions.
31 changes: 31 additions & 0 deletions MetadataExtractor.Tests/IO/IndexedReaderTestBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -318,5 +318,36 @@ public void GetByteEof()
reader.GetByte(0);
Assert.Throws<BufferBoundsException>(() => reader.GetByte(1));
}

[Fact]
public void WithShiftedBaseOffset()
{
var reader = CreateReader(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
reader.IsMotorolaByteOrder = false;

Assert.Equal(10, reader.Length);
Assert.Equal(0, reader.GetByte(0));
Assert.Equal(1, reader.GetByte(1));
Assert.Equal(new byte[] { 0, 1 }, reader.GetBytes(0, 2));
Assert.Equal(4, reader.ToUnshiftedOffset(4));

reader = reader.WithShiftedBaseOffset(2);

Assert.False(reader.IsMotorolaByteOrder);
Assert.Equal(8, reader.Length);
Assert.Equal(2, reader.GetByte(0));
Assert.Equal(3, reader.GetByte(1));
Assert.Equal(new byte[] { 2, 3 }, reader.GetBytes(0, 2));
Assert.Equal(6, reader.ToUnshiftedOffset(4));

reader = reader.WithShiftedBaseOffset(2);

Assert.False(reader.IsMotorolaByteOrder);
Assert.Equal(6, reader.Length);
Assert.Equal(4, reader.GetByte(0));
Assert.Equal(5, reader.GetByte(1));
Assert.Equal(new byte[] { 4, 5 }, reader.GetBytes(0, 2));
Assert.Equal(8, reader.ToUnshiftedOffset(4));
}
}
}
6 changes: 3 additions & 3 deletions MetadataExtractor/Formats/Exif/ExifReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public sealed class ExifReader : IJpegSegmentMetadataReader
{
return segments
.Where(segment => segment.Bytes.Length >= JpegSegmentPreamble.Length && Encoding.UTF8.GetString(segment.Bytes, 0, JpegSegmentPreamble.Length) == JpegSegmentPreamble)
.SelectMany(segment => Extract(new ByteArrayReader(segment.Bytes), JpegSegmentPreamble.Length))
.SelectMany(segment => Extract(new ByteArrayReader(segment.Bytes, baseOffset: JpegSegmentPreamble.Length)))
.ToList();
}

Expand All @@ -73,14 +73,14 @@ public sealed class ExifReader : IJpegSegmentMetadataReader
#else
IReadOnlyList<Directory>
#endif
Extract([NotNull] IndexedReader reader, int readerOffset = 0)
Extract([NotNull] IndexedReader reader)
{
var directories = new List<Directory>();

try
{
// Read the TIFF-formatted Exif data
TiffReader.ProcessTiff(reader, new ExifTiffHandler(directories, StoreThumbnailBytes), readerOffset);
TiffReader.ProcessTiff(reader, new ExifTiffHandler(directories, StoreThumbnailBytes));
}
catch (TiffProcessingException e)
{
Expand Down
63 changes: 33 additions & 30 deletions MetadataExtractor/Formats/Exif/ExifTiffHandler.cs

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions MetadataExtractor/Formats/Tiff/DirectoryTiffHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ protected void PushDirectory([NotNull] Directory directory)
public void SetInt32U(int tagId, uint int32U) => CurrentDirectory.Set(tagId, int32U);
public void SetInt32UArray(int tagId, uint[] array) => CurrentDirectory.Set(tagId, array);

public abstract void Completed(IndexedReader reader, int tiffHeaderOffset);
public abstract void Completed(IndexedReader reader);

public abstract bool CustomProcessTag(int tagOffset, ICollection<int> processedIfdOffsets, int tiffHeaderOffset, IndexedReader reader, int tagId, int byteCount);
public abstract bool CustomProcessTag(int tagOffset, ICollection<int> processedIfdOffsets, IndexedReader reader, int tagId, int byteCount);

public abstract bool TryCustomProcessFormat(int tagId, TiffDataFormatCode formatCode, uint componentCount, out long byteCount);

Expand Down
4 changes: 2 additions & 2 deletions MetadataExtractor/Formats/Tiff/ITiffHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ public interface ITiffHandler

void EndingIfd();

void Completed([NotNull] IndexedReader reader, int tiffHeaderOffset);
void Completed([NotNull] IndexedReader reader);

/// <exception cref="System.IO.IOException"/>
bool CustomProcessTag(int tagOffset, [NotNull] ICollection<int> processedIfdOffsets, int tiffHeaderOffset, [NotNull] IndexedReader reader, int tagId, int byteCount);
bool CustomProcessTag(int tagOffset, [NotNull] ICollection<int> processedIfdOffsets, [NotNull] IndexedReader reader, int tagId, int byteCount);

bool TryCustomProcessFormat(int tagId, TiffDataFormatCode formatCode, uint componentCount, out long byteCount);

Expand Down
51 changes: 25 additions & 26 deletions MetadataExtractor/Formats/Tiff/TiffReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ public static class TiffReader
/// <summary>Processes a TIFF data sequence.</summary>
/// <param name="reader">the <see cref="IndexedReader"/> from which the data should be read</param>
/// <param name="handler">the <see cref="ITiffHandler"/> that will coordinate processing and accept read values</param>
/// <param name="tiffHeaderOffset">the offset within <c>reader</c> at which the TIFF header starts</param>
/// <exception cref="TiffProcessingException">if an error occurred during the processing of TIFF data that could not be ignored or recovered from</exception>
/// <exception cref="System.IO.IOException">an error occurred while accessing the required data</exception>
/// <exception cref="TiffProcessingException"/>
public static void ProcessTiff([NotNull] IndexedReader reader, [NotNull] ITiffHandler handler, int tiffHeaderOffset = 0)
public static void ProcessTiff([NotNull] IndexedReader reader, [NotNull] ITiffHandler handler)
{
// Read byte order.
switch (reader.GetInt16(tiffHeaderOffset))
var byteOrder = reader.GetInt16(0);
switch (byteOrder)
{
case 0x4d4d: // MM
reader.IsMotorolaByteOrder = true;
Expand All @@ -53,29 +53,29 @@ public static void ProcessTiff([NotNull] IndexedReader reader, [NotNull] ITiffHa
reader.IsMotorolaByteOrder = false;
break;
default:
throw new TiffProcessingException("Unclear distinction between Motorola/Intel byte ordering: " + reader.GetInt16(tiffHeaderOffset));
throw new TiffProcessingException("Unclear distinction between Motorola/Intel byte ordering: " + reader.GetInt16(0));
}

// Check the next two values for correctness.
int tiffMarker = reader.GetUInt16(2 + tiffHeaderOffset);
int tiffMarker = reader.GetUInt16(2);
handler.SetTiffMarker(tiffMarker);

var firstIfdOffset = reader.GetInt32(4 + tiffHeaderOffset) + tiffHeaderOffset;
var firstIfdOffset = reader.GetInt32(4);

// David Ekholm sent a digital camera image that has this problem
// TODO calling Length should be avoided as it causes IndexedCapturingReader to read to the end of the stream
if (firstIfdOffset >= reader.Length - 1)
{
handler.Warn("First IFD offset is beyond the end of the TIFF data segment -- trying default offset");
// First directory normally starts immediately after the offset bytes, so try that
firstIfdOffset = tiffHeaderOffset + 2 + 2 + 4;
firstIfdOffset = 2 + 2 + 4;
}

var processedIfdOffsets = new HashSet<int>();

ProcessIfd(handler, reader, processedIfdOffsets, firstIfdOffset, tiffHeaderOffset);
ProcessIfd(handler, reader, processedIfdOffsets, firstIfdOffset);

handler.Completed(reader, tiffHeaderOffset);
handler.Completed(reader);
}

/// <summary>Processes a TIFF IFD.</summary>
Expand All @@ -94,21 +94,24 @@ public static void ProcessTiff([NotNull] IndexedReader reader, [NotNull] ITiffHa
/// </remarks>
/// <param name="handler">the <see cref="ITiffHandler"/> that will coordinate processing and accept read values</param>
/// <param name="reader">the <see cref="IndexedReader"/> from which the data should be read</param>
/// <param name="processedIfdOffsets">the set of visited IFD offsets, to avoid revisiting the same IFD in an endless loop</param>
/// <param name="processedGlobalIfdOffsets">the set of visited IFD offsets, to avoid revisiting the same IFD in an endless loop</param>
/// <param name="ifdOffset">the offset within <c>reader</c> at which the IFD data starts</param>
/// <param name="tiffHeaderOffset">the offset within <c>reader</c> at which the TIFF header starts</param>
/// <exception cref="System.IO.IOException">an error occurred while accessing the required data</exception>
public static void ProcessIfd([NotNull] ITiffHandler handler, [NotNull] IndexedReader reader, [NotNull] ICollection<int> processedIfdOffsets, int ifdOffset, int tiffHeaderOffset)
public static void ProcessIfd([NotNull] ITiffHandler handler, [NotNull] IndexedReader reader, [NotNull] ICollection<int> processedGlobalIfdOffsets, int ifdOffset)
{
bool? resetByteOrder = null;
try
{
// check for directories we've already visited to avoid stack overflows when recursive/cyclic directory structures exist
if (processedIfdOffsets.Contains(ifdOffset))
// Check for directories we've already visited to avoid stack overflows when recursive/cyclic directory structures exist.
// Note that we track these offsets in the global frame, not the reader's local frame.
var globalIfdOffset = reader.ToUnshiftedOffset(ifdOffset);
if (processedGlobalIfdOffsets.Contains(globalIfdOffset))
return;

// remember that we've visited this directory so that we don't visit it again later
processedIfdOffsets.Add(ifdOffset);
// Remember that we've visited this directory so that we don't visit it again later
processedGlobalIfdOffsets.Add(globalIfdOffset);

// Validate IFD offset
if (ifdOffset >= reader.Length || ifdOffset < 0)
{
handler.Error("Ignored IFD marked to start outside data segment");
Expand Down Expand Up @@ -181,14 +184,13 @@ public static void ProcessIfd([NotNull] ITiffHandler handler, [NotNull] IndexedR
if (byteCount > 4)
{
// If it's bigger than 4 bytes, the dir entry contains an offset.
var offsetVal = reader.GetInt32(tagOffset + 8);
if (offsetVal + byteCount > reader.Length)
tagValueOffset = reader.GetUInt32(tagOffset + 8);
if (tagValueOffset + byteCount > reader.Length)
{
// Bogus pointer offset and / or byteCount value
handler.Error("Illegal TIFF tag pointer offset");
continue;
}
tagValueOffset = tiffHeaderOffset + offsetVal;
}
else
{
Expand Down Expand Up @@ -219,14 +221,14 @@ public static void ProcessIfd([NotNull] ITiffHandler handler, [NotNull] IndexedR
if (handler.TryEnterSubIfd(tagId))
{
isIfdPointer = true;
var subDirOffset = tiffHeaderOffset + reader.GetUInt32((int)(tagValueOffset + i*4));
ProcessIfd(handler, reader, processedIfdOffsets, (int)subDirOffset, tiffHeaderOffset);
var subDirOffset = reader.GetUInt32((int)(tagValueOffset + i*4));
ProcessIfd(handler, reader, processedGlobalIfdOffsets, (int)subDirOffset);
}
}
}

// If it wasn't an IFD pointer, allow custom tag processing to occur
if (!isIfdPointer && !handler.CustomProcessTag((int)tagValueOffset, processedIfdOffsets, tiffHeaderOffset, reader, tagId, (int)byteCount))
if (!isIfdPointer && !handler.CustomProcessTag((int)tagValueOffset, processedGlobalIfdOffsets, reader, tagId, (int)byteCount))
{
// If no custom processing occurred, process the tag in the standard fashion
ProcessTag(handler, tagId, (int)tagValueOffset, (int)componentCount, formatCode, reader);
Expand All @@ -238,12 +240,9 @@ public static void ProcessIfd([NotNull] ITiffHandler handler, [NotNull] IndexedR
var nextIfdOffset = reader.GetInt32(finalTagOffset);
if (nextIfdOffset != 0)
{
nextIfdOffset += tiffHeaderOffset;

if (nextIfdOffset >= reader.Length)
{
// Last 4 bytes of IFD reference another IFD with an address that is out of bounds
// Note this could have been caused by jhead 1.3 cropping too much
return;
}
else if (nextIfdOffset < ifdOffset)
Expand All @@ -254,7 +253,7 @@ public static void ProcessIfd([NotNull] ITiffHandler handler, [NotNull] IndexedR
}

if (handler.HasFollowerIfd())
ProcessIfd(handler, reader, processedIfdOffsets, nextIfdOffset, tiffHeaderOffset);
ProcessIfd(handler, reader, processedGlobalIfdOffsets, nextIfdOffset);
}
}
finally
Expand Down
23 changes: 17 additions & 6 deletions MetadataExtractor/IO/ByteArrayReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,40 +39,51 @@ public class ByteArrayReader : IndexedReader
{
[NotNull]
private readonly byte[] _buffer;
private readonly int _baseOffset;

public ByteArrayReader([NotNull] byte[] buffer)
public ByteArrayReader([NotNull] byte[] buffer, int baseOffset = 0)
{
if (buffer == null)
throw new ArgumentNullException(nameof(buffer));
if (baseOffset < 0)
throw new ArgumentOutOfRangeException(nameof(baseOffset), "Must be zero or greater.");

_buffer = buffer;
_baseOffset = baseOffset;
}

public override long Length => _buffer.Length;
public override IndexedReader WithShiftedBaseOffset(int shift) => shift == 0 ? this : new ByteArrayReader(_buffer, _baseOffset + shift) { IsMotorolaByteOrder = IsMotorolaByteOrder };

public override int ToUnshiftedOffset(int localOffset) => localOffset + _baseOffset;

public override long Length => _buffer.Length - _baseOffset;

public override byte GetByte(int index)
{
ValidateIndex(index, 1);
return _buffer[index];
return _buffer[index + _baseOffset];
}

protected override void ValidateIndex(int index, int bytesRequested)
{
if (!IsValidIndex(index, bytesRequested))
throw new BufferBoundsException(index, bytesRequested, _buffer.Length);
throw new BufferBoundsException(ToUnshiftedOffset(index), bytesRequested, _buffer.Length);
}

protected override bool IsValidIndex(int index, int bytesRequested)
{
return bytesRequested >= 0 && index >= 0 && index + (long)bytesRequested - 1L < _buffer.Length;
return
bytesRequested >= 0 &&
index >= 0 &&
index + (long)bytesRequested - 1L < Length;
}

public override byte[] GetBytes(int index, int count)
{
ValidateIndex(index, count);

var bytes = new byte[count];
Array.Copy(_buffer, index, bytes, 0, count);
Array.Copy(_buffer, index + _baseOffset, bytes, 0, count);
return bytes;
}
}
Expand Down
41 changes: 37 additions & 4 deletions MetadataExtractor/IO/IndexedCapturingReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ protected override void ValidateIndex(int index, int bytesRequested)
if (!IsValidIndex(index, bytesRequested))
{
if (index < 0)
throw new BufferBoundsException($"Attempt to read from buffer using a negative index ({index}).");
throw new BufferBoundsException($"Attempt to read from buffer using a negative index ({index})");
if (bytesRequested < 0)
throw new BufferBoundsException("Number of requested bytes must be zero or greater.");
throw new BufferBoundsException("Number of requested bytes must be zero or greater");
if ((long)index + bytesRequested - 1 > int.MaxValue)
throw new BufferBoundsException($"Number of requested bytes summed with starting index exceed maximum range of signed 32 bit integers (requested index: {index}, requested count: {bytesRequested}).");
throw new BufferBoundsException($"Number of requested bytes summed with starting index exceed maximum range of signed 32 bit integers (requested index: {index}, requested count: {bytesRequested})");

Debug.Assert(_isStreamFinished);
// TODO test that can continue using an instance of this type after this exception
throw new BufferBoundsException(index, bytesRequested, _streamLength);
throw new BufferBoundsException(ToUnshiftedOffset(index), bytesRequested, _streamLength);
}
}

Expand Down Expand Up @@ -140,6 +140,8 @@ protected override bool IsValidIndex(int index, int bytesRequested)
return true;
}

public override int ToUnshiftedOffset(int localOffset) => localOffset;

public override byte GetByte(int index)
{
ValidateIndex(index, 1);
Expand Down Expand Up @@ -171,5 +173,36 @@ public override byte[] GetBytes(int index, int count)
}
return bytes;
}

public override IndexedReader WithShiftedBaseOffset(int shift) => shift == 0 ? (IndexedReader)this : new ShiftedIndexedCapturingReader(this, shift) { IsMotorolaByteOrder = IsMotorolaByteOrder };

private sealed class ShiftedIndexedCapturingReader : IndexedReader
{
private readonly IndexedCapturingReader _baseReader;
private readonly int _baseOffset;

public ShiftedIndexedCapturingReader(IndexedCapturingReader baseReader, int baseOffset)
{
if (baseOffset < 0)
throw new ArgumentOutOfRangeException(nameof(baseOffset), "Must be zero or greater.");

_baseReader = baseReader;
_baseOffset = baseOffset;
}

public override IndexedReader WithShiftedBaseOffset(int shift) => shift == 0 ? this : new ShiftedIndexedCapturingReader(_baseReader, _baseOffset + shift) { IsMotorolaByteOrder = IsMotorolaByteOrder };

public override int ToUnshiftedOffset(int localOffset) => localOffset + _baseOffset;

public override byte GetByte(int index) => _baseReader.GetByte(_baseOffset + index);

public override byte[] GetBytes(int index, int count) => _baseReader.GetBytes(_baseOffset + index, count);

protected override void ValidateIndex(int index, int bytesRequested) => _baseReader.ValidateIndex(index + _baseOffset, bytesRequested);

protected override bool IsValidIndex(int index, int bytesRequested) => _baseReader.IsValidIndex(index + _baseOffset, bytesRequested);

public override long Length => _baseReader.Length - _baseOffset;
}
}
}
4 changes: 4 additions & 0 deletions MetadataExtractor/IO/IndexedReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ public abstract class IndexedReader
/// </remarks>
public bool IsMotorolaByteOrder { set; get; } = true;

public abstract IndexedReader WithShiftedBaseOffset(int shift);

public abstract int ToUnshiftedOffset(int localOffset);

/// <summary>Gets the byte value at the specified byte <c>index</c>.</summary>
/// <remarks>
/// Implementations must validate <paramref name="index"/> by calling <see cref="ValidateIndex"/>.
Expand Down
Loading

0 comments on commit 2281cea

Please sign in to comment.