codehaus-plexus · belingueres · Jan 25, 2019 · Mar 9, 2019 · hboutemy · Mar 9, 2019
diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java
@@ -2664,7 +2664,8 @@ protected char[] parseEntityRef()
         entityRefName = null;
         posStart = pos;
         char ch = more();
-        StringBuilder sb = new StringBuilder();
+        StringBuilder sb16 = new StringBuilder();
+        StringBuilder sb10 = new StringBuilder();
         if ( ch == '#' )
         {
             // parse character reference
@@ -2679,17 +2680,17 @@ protected char[] parseEntityRef()
                     if ( ch >= '0' && ch <= '9' )
                     {
                         charRef = (char) ( charRef * 16 + ( ch - '0' ) );
-                        sb.append( ch );
+                        sb16.append( ch );
                     }
                     else if ( ch >= 'a' && ch <= 'f' )
                     {
                         charRef = (char) ( charRef * 16 + ( ch - ( 'a' - 10 ) ) );
-                        sb.append( ch );
+                        sb16.append( ch );
                     }
                     else if ( ch >= 'A' && ch <= 'F' )
                     {
                         charRef = (char) ( charRef * 16 + ( ch - ( 'A' - 10 ) ) );
-                        sb.append( ch );
+                        sb16.append( ch );
                     }
                     else if ( ch == ';' )
                     {
@@ -2710,6 +2711,7 @@ else if ( ch >= 'A' && ch <= 'F' )
                     if ( ch >= '0' && ch <= '9' )
                     {
                         charRef = (char) ( charRef * 10 + ( ch - '0' ) );
+                        sb10.append( ch );
                     }
                     else if ( ch == ';' )
                     {
@@ -2724,16 +2726,35 @@ else if ( ch >= 'A' && ch <= 'F' )
                 }
             }
             posEnd = pos - 1;
-            if ( sb.length() > 0 )
+            if ( sb16.length() > 0 )
             {
-                char[] tmp = toChars( Integer.parseInt( sb.toString(), 16 ) );
-                charRefOneCharBuf = tmp;
+                try
+                {
+                    charRefOneCharBuf = toChars( Integer.parseInt( sb16.toString(), 16 ) );
+                }
+                catch ( IllegalArgumentException e )
+                {
+                    throw new XmlPullParserException( "character reference (with hex value " + sb16.toString()
+                        + ") is invalid", this, null );
+                }
+
                 if ( tokenize )
                 {
                     text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
                 }
                 return charRefOneCharBuf;
             }
+
+            try
+            {
+                toChars( Integer.parseInt( sb10.toString(), 10 ) );
+            }
+            catch ( IllegalArgumentException e )
+            {
+                throw new XmlPullParserException( "character reference (with decimal value " + sb10.toString()
+                    + ") is invalid", this, null );
+            }
+
             charRefOneCharBuf[0] = charRef;
             if ( tokenize )
             {
@@ -3996,15 +4017,21 @@ private static boolean isHighSurrogate( char ch )
         return ( MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch );
     }
 
-    private static final int MIN_CODE_POINT = 0x000000;
-
     private static final int MAX_CODE_POINT = 0x10FFFF;
 
     private static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
 
+    /**
+     * Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
+     * 
+     * @param codePoint the numeric value to check
+     * @return true if it is a valid numeric character reference. False otherwise.
+     */
     private static boolean isValidCodePoint( int codePoint )
     {
-        return ( MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint );
+        // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+        return codePoint == 0x9 || codePoint == 0xA || codePoint == 0xD || ( 0x20 <= codePoint && codePoint <= 0xD7FF )
+            || ( 0xE000 <= codePoint && codePoint <= 0xFFFD ) || ( 0x10000 <= codePoint && codePoint <= 0X10FFFF );
     }
 
     private static boolean isSupplementaryCodePoint( int codePoint )

diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java
@@ -17,6 +17,8 @@
  */
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.io.StringReader;
@@ -156,6 +158,46 @@ public void testUnicodeEntities()
         assertEquals( XmlPullParser.END_TAG, parser.nextToken() );
     }
 
+    @Test
+    public void testInvalidCharacterReferenceHexa()
+        throws Exception
+    {
+        MXParser parser = new MXParser();
+        String input = "<root>&#x110000;</root>";
+        parser.setInput( new StringReader( input ) );
+
+        try
+        {
+            assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+            assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+            fail( "Should fail since &#x110000; is an illegal character reference" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "character reference (with hex value 110000) is invalid" ) );
+        }
+    }
+
+    @Test
+    public void testInvalidCharacterReferenceDecimal()
+        throws Exception
+    {
+        MXParser parser = new MXParser();
+        String input = "<root>&#1114112;</root>";
+        parser.setInput( new StringReader( input ) );
+
+        try
+        {
+            assertEquals( XmlPullParser.START_TAG, parser.nextToken() );
+            assertEquals( XmlPullParser.ENTITY_REF, parser.nextToken() );
+            fail( "Should fail since &#1114112; is an illegal character reference" );
+        }
+        catch ( XmlPullParserException e )
+        {
+            assertTrue( e.getMessage().contains( "character reference (with decimal value 1114112) is invalid" ) );
+        }
+    }
+
     @Test
     public void testProcessingInstruction()
         throws Exception