Skip to content

Commit

Permalink
Port IPTC encoding fix from Java library
Browse files Browse the repository at this point in the history
  • Loading branch information
drewnoakes committed May 22, 2023
1 parent 6e92951 commit c874c0e
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
2 changes: 2 additions & 0 deletions MetadataExtractor.Tests/Formats/Iptc/Iso2022ConverterTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ public sealed class Iso2022ConverterTest
public void ConvertEscapeSequenceToEncodingName()
{
Assert.Equal("UTF-8", Iso2022Converter.ConvertEscapeSequenceToEncodingName(new byte[] { 0x1B, 0x25, 0x47 }));
//Assert.Equal("ISO-8859-1", Iso2022Converter.ConvertEscapeSequenceToEncodingName(new byte[] { 0x1B, 0x2E, 0x41 }));
Assert.Equal("ISO-8859-1", Iso2022Converter.ConvertEscapeSequenceToEncodingName(new byte[] { 0x1B, 0xE2, 0x80, 0xA2, 0x41 }));
Assert.Equal("ISO-8859-1", Iso2022Converter.ConvertEscapeSequenceToEncodingName(new byte[] { 0x1B, 0x2D, 0x41 }));
Assert.Null(Iso2022Converter.ConvertEscapeSequenceToEncodingName(new byte[] { 1, 2, 3, 4 }));
}
}
Expand Down
6 changes: 5 additions & 1 deletion MetadataExtractor/Formats/Iptc/Iso2022Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ public static class Iso2022Converter
private const int Dot = 0xe280a2;
private const byte LatinCapitalA = (byte)'A';
private const byte LatinCapitalG = (byte)'G';
private const byte MinusSign = (byte)'-';
private const byte PercentSign = (byte)'%';
private const byte Esc = 0x1B;

Expand All @@ -19,12 +20,15 @@ public static class Iso2022Converter
if (bytes.Length > 3 && bytes[0] == Esc && (bytes[3] | (bytes[2] << 8) | (bytes[1] << 16)) == Dot && bytes[4] == LatinCapitalA)
return "ISO-8859-1";

if (bytes.Length > 2 && bytes[0] == Esc && bytes[1] == MinusSign && bytes[2] == LatinCapitalA)
return "ISO-8859-1";

return null;
}

/// <summary>Attempts to guess the encoding of a string provided as a byte array.</summary>
/// <remarks>
/// Encodings trialled are, in order:
/// Encodings trialed are, in order:
/// <list type="bullet">
/// <item>UTF-8</item>
/// <item>ISO-8859-1</item>
Expand Down

0 comments on commit c874c0e

Please sign in to comment.