Skip to content

Commit

Permalink
[DOXIA-716] Update and unify XMLReader creation and configuration
Browse files Browse the repository at this point in the history
This closes #187
  • Loading branch information
michael-o committed Dec 30, 2023
1 parent a8a8f2a commit e43b200
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,11 @@ public void parse(Reader source, Sink sink, String reference) throws ParseExcept
throw new ParseException("Error reading the model", e);
}

new XmlValidator().validate(content);
XmlValidator validator = new XmlValidator();
validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
validator.setEntityResolver(new CachedFileEntityResolver());

validator.validate(content);

src = new StringReader(content);
}
Expand Down
117 changes: 52 additions & 65 deletions doxia-core/src/main/java/org/apache/maven/doxia/util/XmlValidator.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,26 @@
*/
package org.apache.maven.doxia.util;

import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import java.io.IOException;
import java.io.StringReader;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.maven.doxia.markup.XmlMarkup;
import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
import org.apache.maven.doxia.parser.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
* A class to validate xml documents.
Expand All @@ -45,18 +47,37 @@
public class XmlValidator {
private static final Logger LOGGER = LoggerFactory.getLogger(XmlValidator.class);

/**
* Doctype pattern i.e. ".*<!DOCTYPE([^>]*)>.*"
* see <a href="http://www.w3.org/TR/REC-xml/#NT-doctypedecl">http://www.w3.org/TR/REC-xml/#NT-doctypedecl</a>.
*/
private static final Pattern PATTERN_DOCTYPE = Pattern.compile(".*" + XmlMarkup.DOCTYPE_START + "([^>]*)>.*");

/** Tag pattern as defined in http://www.w3.org/TR/REC-xml/#NT-Name */
private static final Pattern PATTERN_TAG = Pattern.compile(".*<([A-Za-z][A-Za-z0-9:_.-]*)([^>]*)>.*");

/** lazy xmlReader to validate xml content*/
private XMLReader xmlReader;

private boolean validate = true;
private DefaultHandler defaultHandler;
private EntityResolver entityResolver;

public boolean isValidate() {
return validate;
}

public void setValidate(boolean validate) {
this.validate = validate;
}

public DefaultHandler getDefaultHandler() {
return defaultHandler;
}

public void setDefaultHandler(DefaultHandler defaultHandler) {
this.defaultHandler = defaultHandler;
}

public EntityResolver getEntityResolver() {
return entityResolver;
}

public void setEntityResolver(EntityResolver entityResolver) {
this.entityResolver = entityResolver;
}

/**
* Validate an XML content with SAX.
*
Expand All @@ -65,57 +86,42 @@ public class XmlValidator {
*/
public void validate(String content) throws ParseException {
try {
// 1 if there's a doctype
boolean hasDoctype = false;
Matcher matcher = PATTERN_DOCTYPE.matcher(content);
if (matcher.find()) {
hasDoctype = true;
}

// 2 check for an xmlns instance
boolean hasXsd = false;
matcher = PATTERN_TAG.matcher(content);
if (matcher.find()) {
String value = matcher.group(2);

if (value.contains(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI)) {
hasXsd = true;
}
}

// 3 validate content
getXmlReader(hasXsd && hasDoctype).parse(new InputSource(new StringReader(content)));
} catch (IOException | SAXException e) {
getXmlReader().parse(new InputSource(new StringReader(content)));
} catch (IOException | SAXException | ParserConfigurationException e) {
throw new ParseException("Error validating the model", e);
}
}

/**
* @param hasDtdAndXsd to flag the <code>ErrorHandler</code>.
* @return an xmlReader instance.
* @throws SAXException if any
* @throws ParserConfigurationException
*/
private XMLReader getXmlReader(boolean hasDtdAndXsd) throws SAXException {
public XMLReader getXmlReader() throws SAXException, ParserConfigurationException {
if (xmlReader == null) {
MessagesErrorHandler errorHandler = new MessagesErrorHandler();

xmlReader = XMLReaderFactory.createXMLReader();
xmlReader.setFeature("http://xml.org/sax/features/validation", true);
xmlReader.setFeature("http://apache.org/xml/features/validation/dynamic", true);
xmlReader.setFeature("http://apache.org/xml/features/validation/schema", true);
xmlReader.setErrorHandler(errorHandler);
xmlReader.setEntityResolver(new CachedFileEntityResolver());
SAXParserFactory parserFactory = SAXParserFactory.newInstance();
parserFactory.setNamespaceAware(true);
SAXParser parser = parserFactory.newSAXParser();
// If both DTD and XSD are provided, force XSD
parser.setProperty(
"http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema");
// Always force language-neutral exception messages for MessagesErrorHandler
parser.setProperty("http://apache.org/xml/properties/locale", Locale.ROOT);
xmlReader = parser.getXMLReader();
xmlReader.setFeature("http://xml.org/sax/features/validation", isValidate());
xmlReader.setFeature("http://apache.org/xml/features/validation/dynamic", isValidate());
xmlReader.setFeature("http://apache.org/xml/features/validation/schema", isValidate());
xmlReader.setErrorHandler(getDefaultHandler());
xmlReader.setEntityResolver(getEntityResolver());
}

((MessagesErrorHandler) xmlReader.getErrorHandler()).setHasDtdAndXsd(hasDtdAndXsd);

return xmlReader;
}

/**
* Convenience class to beautify <code>SAXParseException</code> messages.
*/
private static class MessagesErrorHandler extends DefaultHandler {
public static class MessagesErrorHandler extends DefaultHandler {
private static final int TYPE_UNKNOWN = 0;

private static final int TYPE_WARNING = 1;
Expand All @@ -130,17 +136,6 @@ private static class MessagesErrorHandler extends DefaultHandler {
private static final Pattern ELEMENT_TYPE_PATTERN =
Pattern.compile("Element type \".*\" must be declared.", Pattern.DOTALL);

private boolean hasDtdAndXsd;

private MessagesErrorHandler() {}

/**
* @param hasDtdAndXsd the hasDtdAndXsd to set
*/
protected void setHasDtdAndXsd(boolean hasDtdAndXsd) {
this.hasDtdAndXsd = hasDtdAndXsd;
}

/** {@inheritDoc} */
@Override
public void warning(SAXParseException e) throws SAXException {
Expand All @@ -150,14 +145,6 @@ public void warning(SAXParseException e) throws SAXException {
/** {@inheritDoc} */
@Override
public void error(SAXParseException e) throws SAXException {
// Workaround for Xerces complaints when an XML with XSD needs also a <!DOCTYPE []> to specify entities
// like &nbsp;
// See http://xsd.stylusstudio.com/2001Nov/post08021.htm
if (!hasDtdAndXsd) {
processException(TYPE_ERROR, e);
return;
}

Matcher m = ELEMENT_TYPE_PATTERN.matcher(e.getMessage());
if (!m.find()) {
processException(TYPE_ERROR, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.maven.doxia.util;

import org.apache.commons.io.IOUtils;
import org.apache.maven.doxia.parser.AbstractXmlParser.CachedFileEntityResolver;
import org.codehaus.plexus.testing.PlexusTest;
import org.codehaus.plexus.util.xml.XmlStreamReader;
import org.junit.jupiter.api.Test;
Expand All @@ -35,6 +36,8 @@ public void testValidate() throws Exception {
String xml = IOUtils.toString(new XmlStreamReader(this.getClass().getResourceAsStream("/test.xml")));

XmlValidator validator = new XmlValidator();
validator.setDefaultHandler(new XmlValidator.MessagesErrorHandler());
validator.setEntityResolver(new CachedFileEntityResolver());

validator.validate(xml);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@
*/
package org.apache.maven.doxia.xsd;

import javax.xml.parsers.ParserConfigurationException;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.apache.maven.doxia.util.XmlValidator;
import org.codehaus.plexus.testing.PlexusTest;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
Expand All @@ -38,7 +41,6 @@
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.fail;
Expand Down Expand Up @@ -157,17 +159,19 @@ protected void setValidate(boolean validate) {
private XMLReader getXMLReader() {
if (xmlReader == null) {
try {
xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
xmlReader.setFeature("http://xml.org/sax/features/validation", validate);
xmlReader.setFeature("http://apache.org/xml/features/validation/schema", validate);
xmlReader.setErrorHandler(new MessagesErrorHandler());
xmlReader.setEntityResolver(getEntityResolver());
XmlValidator validator = new XmlValidator();
validator.setValidate(validate);
validator.setDefaultHandler(new MessagesErrorHandler());
validator.setEntityResolver(getEntityResolver());
xmlReader = validator.getXmlReader();
} catch (SAXNotRecognizedException e) {
fail("SAXNotRecognizedException: " + e.getMessage());
} catch (SAXNotSupportedException e) {
fail("SAXNotSupportedException: " + e.getMessage());
} catch (SAXException e) {
fail("SAXException: " + e.getMessage());
} catch (ParserConfigurationException e) {
fail("ParserConfigurationException: " + e.getMessage());
}
}

Expand All @@ -185,10 +189,11 @@ private XMLReader getXMLReader() {
private List<ErrorMessage> parseXML(String content) throws IOException, SAXException {
String xmlContent = addNamespaces(content);

MessagesErrorHandler errorHandler =
(MessagesErrorHandler) getXMLReader().getErrorHandler();
XMLReader xmlReader = getXMLReader();

MessagesErrorHandler errorHandler = (MessagesErrorHandler) xmlReader.getErrorHandler();

getXMLReader().parse(new InputSource(new StringReader(xmlContent)));
xmlReader.parse(new InputSource(new StringReader(xmlContent)));

return errorHandler.getMessages();
}
Expand Down

0 comments on commit e43b200

Please sign in to comment.