Skip to content

Commit

Permalink
Support not finding the Pages dictionary in lenient mode (UglyToad#897)
Browse files Browse the repository at this point in the history
* Support not finding the Pages dictionary in lenient mode and support Kids object not referencing a page object in lenient mode

---------

Co-authored-by: Arnaud TAMAILLON <arnaud.tamaillon@younited-credit.fr>
  • Loading branch information
Greybird and Arnaud TAMAILLON authored Sep 1, 2024
1 parent 1bfd6de commit cf45dcf
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 3 deletions.
35 changes: 35 additions & 0 deletions src/UglyToad.PdfPig.Tests/Integration/CatGeneticsTests.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using Annotations;
using PdfPig.Core;

public class CatGeneticsTests
{
Expand Down Expand Up @@ -39,5 +40,39 @@ public void CanGetAnnotations()
}
}
}

[Fact]
public void CanSupportPageInformationNotFoundInLenientMode()
{
var path = IntegrationHelpers.GetSpecificTestDocumentPath("pages-indirect-to-null.pdf");
// Lenient Parsing On -> can process
using (var document = PdfDocument.Open(path))
{
// unable to parse
Assert.Equal(1, document.NumberOfPages);
Assert.NotNull(document.GetPage(1));
}

// Lenient Parsing Off -> throws
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff));
Assert.Equal("Pages entry is null", ex.Message);
}

[Fact]
public void CanSupportPageKidsObjectNotBeingAPage()
{
var path = IntegrationHelpers.GetSpecificTestDocumentPath("pages-kids-not-page.pdf");

using (var document = PdfDocument.Open(path))
{
// unable to parse
Assert.Equal(1, document.NumberOfPages);
Assert.NotNull(document.GetPage(1));
}

// Lenient Parsing Off -> throws
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff));
Assert.Equal("Could not find dictionary associated with reference in pages kids array: 3 0.", ex.Message);
}
}
}
Binary file not shown.
Binary file not shown.
10 changes: 7 additions & 3 deletions src/UglyToad.PdfPig/Content/PagesFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,19 @@ private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput,

foreach (var kid in kids.Data)
{
DictionaryToken? kidDictionaryToken = null;
if (!(kid is IndirectReferenceToken kidRef))
{
throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}.");
}

if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken? kidDictionaryToken))
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out kidDictionaryToken))
{
throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
if (!isLenientParsing)
{
throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
}
}
kidDictionaryToken ??= new DictionaryToken(new Dictionary<NameToken, IToken>());

bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing);

Expand Down
12 changes: 12 additions & 0 deletions src/UglyToad.PdfPig/Parser/CatalogFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ public static Catalog Create(IndirectReference rootReference, DictionaryToken di
pagesDictionary = DirectObjectFinder.Get<DictionaryToken>(value, scanner);
}

if (pagesDictionary == null)
{
if (isLenientParsing)
{
pagesDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>());
}
else
{
throw new PdfDocumentFormatException($"Pages entry is null");
}
}

var pages = PagesFactory.Create(pagesReference, pagesDictionary, scanner, pageFactory, log, isLenientParsing);
var namedDestinations = NamedDestinationsProvider.Read(dictionary, scanner, pages, null);

Expand Down

0 comments on commit cf45dcf

Please sign in to comment.