Skip to content

Commit

Permalink
Merge pull request #534 from openpreserve/fix/wave-ids-and-offsets
Browse files Browse the repository at this point in the history
WAVE / IFF: Accurate file offsets and new validation checks
  • Loading branch information
carlwilson authored Dec 10, 2019
2 parents 267d8d7 + 4d95d89 commit f062eb5
Show file tree
Hide file tree
Showing 49 changed files with 300 additions and 2,047 deletions.
28 changes: 27 additions & 1 deletion jhove-bbt/scripts/create-1.23-target.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ echo "Executing baseline update"
# Copying baseline for now we're not making any changes
cp -R "${baselineRoot}" "${targetRoot}"

# # Copy valid JP2K files across for new MIX metadata see https://github.com/openpreserve/jhove/pull/445
# Copy valid JP2K files across for new MIX metadata see https://github.com/openpreserve/jhove/pull/445
if [[ -d "${candidateRoot}/examples/modules/JPEG2000-hul" ]]; then
echo "Copying valid JPEG2000 examples."
cp -Rf "${candidateRoot}/examples/modules/JPEG2000-hul" "${targetRoot}/examples/modules/"
Expand All @@ -64,11 +64,37 @@ if [[ -d "${candidateRoot}/errors/modules/JPEG2000-hul" ]]; then
cp -Rf "${candidateRoot}/errors/modules/JPEG2000-hul" "${targetRoot}/errors/modules/"
fi

# Copy WAV files across for new MIX metadata see https://github.com/openpreserve/jhove/pull/445
if [[ -d "${candidateRoot}/examples/modules/WAVE-hul" ]]; then
echo "Copying valid WAVE examples."
cp -Rf "${candidateRoot}/examples/modules/WAVE-hul" "${targetRoot}/examples/modules/"
fi
if [[ -d "${candidateRoot}/errors/modules/WAVE-hul" ]]; then
echo "Copying WAVE errors."
cp -Rf "${candidateRoot}/errors/modules/WAVE-hul" "${targetRoot}/errors/modules/"
fi

find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/^ <module release="1.4.1">JPEG2000-hul<\/module>$/ <module release="1.4.2">JPEG2000-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/^ <outputHandler release="1.8">XML/ <outputHandler release="1.9">XML/' {} \;
find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/^ <release>1.4.1<\/release>$/ <release>1.4.2<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/^ <date>2019-04-17<\/date>$/ <date>2019-10-18<\/date>/' {} \;

find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/^ <module release="1.7.1">WAVE-hul<\/module>$/ <module release="1.8.1">WAVE-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-WAVE-hul.jhove.xml" -exec sed -i 's/^ <release>1.7.1<\/release>$/ <release>1.8.1<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-WAVE-hul.jhove.xml" -exec sed -i 's/^ <date>2019-04-17<\/date>$/ <date>2019-12-10<\/date>/' {} \;

find "${targetRoot}" -type f -name "*.aif.jhove.xml" -exec sed -i 's/^ <reportingModule release="1.5.1" date="2019-04-17">AIFF-hul<\/reportingModule>$/ <reportingModule release="1.6.1" date="2019-12-10">AIFF-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "*.AIF.jhove.xml" -exec sed -i 's/^ <reportingModule release="1.5.1" date="2019-04-17">AIFF-hul<\/reportingModule>$/ <reportingModule release="1.6.1" date="2019-12-10">AIFF-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/^ <module release="1.5.1">AIFF-hul<\/module>$/ <module release="1.6.1">AIFF-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-AIFF-hul.jhove.xml" -exec sed -i 's/^ <release>1.5.1<\/release>$/ <release>1.6.1<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-AIFF-hul.jhove.xml" -exec sed -i 's/^ <date>2019-04-17<\/date>$/ <date>2019-12-10<\/date>/' {} \;

find "${targetRoot}" -type f -name "*.tif.jhove.xml" -exec sed -i 's/^ <reportingModule release="1.9.1" date="2019-04-17">TIFF-hul<\/reportingModule>$/ <reportingModule release="1.10.1" date="2019-12-10">TIFF-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "*.g3.jhove.xml" -exec sed -i 's/^ <reportingModule release="1.9.1" date="2019-04-17">TIFF-hul<\/reportingModule>$/ <reportingModule release="1.10.1" date="2019-12-10">TIFF-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/^ <module release="1.9.1">TIFF-hul<\/module>$/ <module release="1.10.1">TIFF-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec sed -i 's/^ <release>1.9.1<\/release>$/ <release>1.10.1<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec sed -i 's/^ <date>2019-04-17<\/date>$/ <date>2019-12-10<\/date>/' {} \;

find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec xmlstarlet ed --inplace -N 'ns=http://schema.openpreservation.org/ois/xml/ns/jhove' -d '//ns:identifiers[.//ns:identifier//ns:value[text()="http://hul.harvard.edu/jhove/references.html#classf" ]]' {} \;
find "${targetRoot}" -type f -name "audit-TIFF-hul.jhove.xml" -exec sed -i '/^ $/d' {} \;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,14 @@ final class JhoveMessageImpl implements JhoveMessage {
private final String message;
private final String subMessage;

private JhoveMessageImpl(final String id, final String message) {
this(id, message, "");
}

private JhoveMessageImpl(final String id, final String message, final String subMessage) {
this.id = id;
this.message = message;
this.subMessage = subMessage;
}

static JhoveMessage getInstance(final String id, final String message, final String subMessage) {
return new JhoveMessageImpl(id, message);
return new JhoveMessageImpl(id, message, subMessage);
}

@Override
Expand Down Expand Up @@ -110,6 +106,4 @@ public boolean equals(Object obj) {
}
return true;
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ public class AiffModule
{ 0X46, 0X4F, 0X52, 0X4D };

private static final String NAME = "AIFF-hul";
private static final String RELEASE = "1.5.1";
private static final int [] DATE = { 2019, 04, 17 };
private static final String RELEASE = "1.6.1";
private static final int [] DATE = { 2019, 12, 10 };
private static final String [] FORMAT = {
"AIFF", "Audio Interchange File Format"
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,20 @@
*/
public abstract class Chunk {

/** Length of chunk ID fields in bytes */
public static final int ID_LENGTH = 4;

/** Length of chunk size fields in bytes */
public static final int SIZE_LENGTH = 4;

/** Length of chunk headers in bytes */
public static final int HEADER_LENGTH = ID_LENGTH + SIZE_LENGTH;

protected ModuleBase _module;
protected long chunkSize;
protected long bytesLeft;
protected DataInputStream _dstream;
protected long bytesLeft;
protected long chunkSize;
protected long chunkOffset;

/**
* Class constructor.
Expand All @@ -33,9 +43,10 @@ public abstract class Chunk {
public Chunk(ModuleBase module, ChunkHeader hdr, DataInputStream dstrm)
{
_module = module;
_dstream = dstrm;
chunkSize = hdr.getSize();
chunkOffset = hdr.getOffset();
bytesLeft = chunkSize;
_dstream = dstrm;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,74 +6,116 @@

package edu.harvard.hul.ois.jhove.module.iff;

import edu.harvard.hul.ois.jhove.*;
import java.io.*;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.messages.JhoveMessage;
import edu.harvard.hul.ois.jhove.messages.JhoveMessages;

import java.io.DataInputStream;
import java.io.IOException;

/**
* This class encapsulates an IFF/AIFF chunk header.
*
* @author Gary McGath
*
*/
public class ChunkHeader {

private ModuleBase _module;
private RepInfo _repInfo;
private String _chunkId; // Four-character ID of the chunk
private long _offset; // Offset from the beginning of file
private long _size; // This does not include the 8 bytes of header
private String _chunkID; // 4-character ID of the chunk


/**
* Constructor.
* Constructor.
*
* @param module The module under which the chunk is being read
* @param info The RepInfo object being used by the module
* @param module The module under which the chunk is being read
* @param info The RepInfo object being used by the module
*/
public ChunkHeader (ModuleBase module, RepInfo info)
public ChunkHeader(ModuleBase module, RepInfo info)
{
_module = module;
_repInfo = info;
}



/**
* Reads the header of a chunk. If _chunkID is non-null,
* it's assumed to have already been read.
* Reads and validates the header of a chunk.
*
* If {@code _chunkId} is non-null it's assumed to have already been read.
*/
public boolean readHeader (DataInputStream dstrm) throws IOException
public boolean readHeader(DataInputStream dstrm) throws IOException
{
StringBuffer id = new StringBuffer(4);
for (int i = 0; i < 4; i++) {
int ch = ModuleBase.readUnsignedByte (dstrm, _module);
if (ch < 32) {
String hx = Integer.toHexString (ch);
if (hx.length () < 2) {
hx = "0" + hx;
}
_repInfo.setMessage (new ErrorMessage
(MessageConstants.IFF_HUL_1,
MessageConstants.IFF_HUL_1_SUB.getMessage() + hx,
_module.getNByte ()));
_repInfo.setWellFormed (false);
final int LOWEST_PRINTABLE_ASCII = 32;
final int HIGHEST_PRINTABLE_ASCII = 126;

_offset = _module.getNByte();

boolean idBeginsWithSpace = false;
boolean spacePrecedesPrintableCharacters = false;
StringBuilder id = new StringBuilder(Chunk.ID_LENGTH);

for (int i = 0; i < Chunk.ID_LENGTH; i++) {

boolean printableCharacter = false;
int ch = ModuleBase.readUnsignedByte(dstrm, _module);

// Characters should be in the printable ASCII range
if (ch < LOWEST_PRINTABLE_ASCII || ch > HIGHEST_PRINTABLE_ASCII) {
_repInfo.setMessage(new ErrorMessage(
MessageConstants.IFF_HUL_1,
String.format(
MessageConstants.IFF_HUL_1_SUB.getMessage(),
ch),
_module.getNByte() - 1));
_repInfo.setWellFormed(false);
return false;
}

if (ch == ' ') {
if (i == 0) {
idBeginsWithSpace = true;
}
} else {
printableCharacter = true;
}

if (idBeginsWithSpace && printableCharacter) {
spacePrecedesPrintableCharacters = true;
}

id.append((char) ch);
}
_chunkID = id.toString ();
_size = ModuleBase.readUnsignedInt (dstrm, _module.isBigEndian (), _module);

_chunkId = id.toString();

// Spaces should not precede printable characters
if (spacePrecedesPrintableCharacters) {
JhoveMessage message = JhoveMessages.getMessageInstance(
MessageConstants.IFF_HUL_2.getId(), String.format(
MessageConstants.IFF_HUL_2.getMessage(), _chunkId));
_repInfo.setMessage(new ErrorMessage(message,
_module.getNByte() - Chunk.ID_LENGTH));
_repInfo.setValid(false);
}

_size = ModuleBase.readUnsignedInt(dstrm, _module.isBigEndian(), _module);

return true;
}


/** Sets the chunk type, which is a 4-character code, directly. */
public void setID (String id)
public void setID(String id)
{
_chunkID = id;
_chunkId = id;
}

/** Returns the chunk type, which is a 4-character code */
public String getID ()
public String getID()
{
return _chunkID;
return _chunkId;
}

/** Sets the chunk size */
Expand All @@ -82,9 +124,15 @@ public void setSize(long size)
_size = size;
}

/** Returns the chunk size (excluding the first 8 bytes) */
public long getSize ()
/** Returns the chunk size, which excludes the length of the header. */
public long getSize()
{
return _size;
}

/** Returns the chunk offset in bytes from the beginning of file. */
public long getOffset()
{
return _offset;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,8 @@
public enum MessageConstants {
INSTANCE;
private static final JhoveMessageFactory messageFactory = JhoveMessages.getInstance("edu.harvard.hul.ois.jhove.module.iff.ErrorMessages"); //$NON-NLS-1$
/**
* Info messages
*/
public static final String INF_CHUNK_TYPE_IGNORED = "Ignored chunk type with ID: ";

/**
* Error messages
*/
public static final JhoveMessage IFF_HUL_1 = messageFactory.getMessage("IFF-HUL-1"); //$NON-NLS-1$
public static final JhoveMessage IFF_HUL_1_SUB = messageFactory.getMessage("IFF-HUL-1-SUB"); //$NON-NLS-1$

public static final JhoveMessage IFF_HUL_2 = messageFactory.getMessage("IFF-HUL-2"); //$NON-NLS-1$
}
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
IFF-HUL-1 = Invalid character in Chunk ID
IFF-HUL-1-SUB = Character = 0x
IFF-HUL-1 = Chunk ID character outside printable ASCII range
IFF-HUL-1-SUB = Character = 0x%02X
IFF-HUL-2 = Chunk ID contains space before printable characters: "%s"
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ public class TiffModule extends ModuleBase {
protected Logger _logger;

private static final String NAME = "TIFF-hul";
private static final String RELEASE = "1.9.1";
private static final int [] DATE = { 2019, 04, 17 };
private static final String RELEASE = "1.10.1";
private static final int [] DATE = { 2019, 12, 10 };
private static final String[] FORMAT = { "TIFF", "Tagged Image File Format" };
private static final String COVERAGE = "TIFF 4.0, 5.0, and 6.0; "
+ "TIFF/IT (ISO/DIS 12639:2003), including file types CT, LW, HC, MP, "
Expand Down
Loading

0 comments on commit f062eb5

Please sign in to comment.