diff --git a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java index e7c7cc47..e15b33b3 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java +++ b/src/main/java/org/codehaus/plexus/util/xml/XmlReader.java @@ -504,11 +504,8 @@ else if ( bomEnc.equals( UTF_8 ) ) } else if ( bomEnc.equals( UTF_16BE ) || bomEnc.equals( UTF_16LE ) ) { - if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) ) - { - throw new IOException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ) ); - } - if ( xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) ) + if ( xmlGuessEnc != null && !xmlGuessEnc.equals( bomEnc ) + || xmlEnc != null && !xmlEnc.equals( UTF_16 ) && !xmlEnc.equals( bomEnc ) ) { throw new XmlStreamReaderException( RAW_EX_1.format( new Object[] { bomEnc, xmlGuessEnc, xmlEnc } ), bomEnc, xmlGuessEnc, xmlEnc, is ); diff --git a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java index e9fc1182..60f91c2a 100644 --- a/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java +++ b/src/main/java/org/codehaus/plexus/util/xml/pull/MXParser.java @@ -15,8 +15,9 @@ import java.io.Reader; import java.io.UnsupportedEncodingException; -import org.codehaus.plexus.util.ReaderFactory; import org.codehaus.plexus.util.xml.XmlReader; +import org.codehaus.plexus.util.xml.XmlStreamReader; +import org.codehaus.plexus.util.xml.XmlStreamReaderException; //import java.util.Hashtable; @@ -663,20 +664,6 @@ public void setInput( Reader in ) { reset(); reader = in; - - if ( reader instanceof XmlReader ) { - // encoding already detected - XmlReader xsr = (XmlReader) reader; - fileEncoding = xsr.getEncoding(); - } - else if ( reader instanceof InputStreamReader ) - { - InputStreamReader isr = (InputStreamReader) reader; - if ( isr.getEncoding() != null ) - { - fileEncoding = isr.getEncoding().toUpperCase(); - } - } } @Override @@ -692,11 +679,11 @@ public void setInput( java.io.InputStream inputStream, String inputEncoding ) { if ( inputEncoding != null ) { - reader = ReaderFactory.newReader( inputStream, inputEncoding ); + reader = new InputStreamReader( inputStream, inputEncoding ); } else { - reader = ReaderFactory.newXmlReader( inputStream ); + reader = new XmlStreamReader( inputStream, false ); } } catch ( UnsupportedEncodingException une ) @@ -704,6 +691,18 @@ public void setInput( java.io.InputStream inputStream, String inputEncoding ) throw new XmlPullParserException( "could not create reader for encoding " + inputEncoding + " : " + une, this, une ); } + catch ( XmlStreamReaderException e ) + { + if ( "UTF-8".equals( e.getBomEncoding() ) ) + { + throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + e.getXmlEncoding() + " is incompatible", this, e ); + } + if ( e.getBomEncoding() != null && e.getBomEncoding().startsWith( "UTF-16" ) ) + { + throw new XmlPullParserException( "UTF-16 BOM in a " + e.getXmlEncoding() + " encoded file is incompatible", this, e ); + } + throw new XmlPullParserException( "could not create reader : " + e, this, e ); + } catch ( IOException e ) { throw new XmlPullParserException( "could not create reader : " + e, this, e ); @@ -3434,17 +3433,6 @@ private void parseXmlDeclWithVersion( int versionStart, int versionEnd ) // TODO reconcile with setInput encodingName inputEncoding = newString( buf, encodingStart, encodingEnd - encodingStart ); - if ( "UTF8".equals( fileEncoding ) && inputEncoding.toUpperCase().startsWith( "ISO-" ) ) - { - throw new XmlPullParserException( "UTF-8 BOM plus xml decl of " + inputEncoding + " is incompatible", - this, null ); - } - else if ("UTF-16".equals( fileEncoding ) && inputEncoding.equalsIgnoreCase( "UTF-8" )) - { - throw new XmlPullParserException( "UTF-16 BOM plus xml decl of " + inputEncoding + " is incompatible", - this, null ); - } - lastParsedAttr = "encoding"; ch = more(); diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java index cba42b32..e16aa5a2 100644 --- a/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java +++ b/src/test/java/org/codehaus/plexus/util/xml/pull/MXParserTest.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; @@ -968,7 +969,7 @@ public void testXMLDeclVersionEncodingStandaloneNoSpace() * @since 3.4.1 */ @Test - public void testEncodingISO_8859_1setInputReader() + public void testEncodingISO_8859_1_newXmlReader() throws IOException { try ( Reader reader = @@ -994,7 +995,7 @@ public void testEncodingISO_8859_1setInputReader() * @since 3.4.1 */ @Test - public void testEncodingISO_8859_1_setInputStream() + public void testEncodingISO_8859_1_InputStream() throws IOException { try ( InputStream input = @@ -1012,12 +1013,6 @@ public void testEncodingISO_8859_1_setInputStream() } } - private static void assertPosition( int row, int col, MXParser parser ) - { - assertEquals( "Current line", row, parser.getLineNumber() ); - assertEquals( "Current column", col, parser.getColumnNumber() ); - } - /** * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 * @@ -1028,7 +1023,7 @@ private static void assertPosition( int row, int col, MXParser parser ) * @since 3.4.2 */ @Test - public void testEncodingISO_8859_1setStringReader() + public void testEncodingISO_8859_1_StringReader() throws IOException { try ( Reader reader = @@ -1047,6 +1042,93 @@ public void testEncodingISO_8859_1setStringReader() } } + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * Another case of bug #163: Reader generated with ReaderFactory.newReader and the right file encoding. + * + * @throws IOException if IO error. + * + * @since 3.5.2 + */ + @Test + public void testEncodingISO_8859_1_newReader() + throws IOException + { + try ( Reader reader = + ReaderFactory.newReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ), + StandardCharsets.UTF_8.name() ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * Another case of bug #163: InputStream supplied with the right file encoding. + * + * @throws IOException if IO error. + * + * @since 3.5.2 + */ + @Test + public void testEncodingISO_8859_1_InputStream_encoded() throws IOException { + try ( InputStream input = + Files.newInputStream( Paths.get( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( input, StandardCharsets.UTF_8.name() ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + /** + * Issue 163: https://github.com/codehaus-plexus/plexus-utils/issues/163 + * + * @throws IOException if IO error. + * + * @since 3.4.1 + */ + @Test + public void testEncodingUTF8_newXmlReader() + throws IOException + { + try ( Reader reader = + ReaderFactory.newXmlReader( new File( "src/test/resources/xml", "test-encoding-ISO-8859-1.xml" ) ) ) + { + MXParser parser = new MXParser(); + parser.setInput( reader ); + while ( parser.nextToken() != XmlPullParser.END_DOCUMENT ) + ; + assertTrue( true ); + } + catch ( XmlPullParserException e ) + { + fail( "should not raise exception: " + e ); + } + } + + private static void assertPosition( int row, int col, MXParser parser ) + { + assertEquals( "Current line", row, parser.getLineNumber() ); + assertEquals( "Current column", col, parser.getColumnNumber() ); + } + /** *
* Test custom Entity not found.
diff --git a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
index 854fb494..db55fb19 100644
--- a/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
+++ b/src/test/java/org/codehaus/plexus/util/xml/pull/eduni_misc_Test_BjoernHoehrmannviaHST2013_09_18_Test.java
@@ -7,10 +7,11 @@
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
-import java.io.InputStreamReader;
+import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
+import org.codehaus.plexus.util.ReaderFactory;
import org.junit.Before;
import org.junit.Test;
@@ -212,17 +213,16 @@ public void testhst_bh_006()
public void testhst_lhs_007()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
+ try ( InputStream is = new FileInputStream( new File( testResourcesDir, "007.xml" ) ) )
{
- parser.setInput( reader );
+ parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
- fail( "UTF-8 BOM plus xml decl of iso-8859-1 incompatible" );
+ fail( "UTF-8 BOM plus xml decl of ISO-8859-1 incompatible" );
}
catch ( XmlPullParserException e )
{
- assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of iso-8859-1 is incompatible" ) );
+ assertTrue( e.getMessage().contains( "UTF-8 BOM plus xml decl of ISO-8859-1 is incompatible" ) );
}
}
@@ -239,17 +239,16 @@ public void testhst_lhs_007()
public void testhst_lhs_008()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_16 ) )
+ try ( InputStream is = new FileInputStream( new File( testResourcesDir, "008.xml" ) ) )
{
- parser.setInput( reader );
+ parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
- fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-16 coding) incompatible" );
+ fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-16 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
- assertTrue( e.getMessage().contains( "UTF-16 BOM plus xml decl of utf-8 is incompatible" ) );
+ assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}
@@ -261,22 +260,24 @@ public void testhst_lhs_008()
* Version:
*
* @throws java.io.IOException if there is an I/O error
+ *
+ * NOTE: This test is SKIPPED as MXParser is unable to detect UTF-16 BOM detection when chars are read as
+ * UTF-8.
*/
@Test
public void testhst_lhs_009()
throws IOException
{
- try ( FileInputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) );
- InputStreamReader reader = new InputStreamReader( is, StandardCharsets.UTF_8 ) )
- {
- parser.setInput( reader );
+ try ( InputStream is = new FileInputStream( new File( testResourcesDir, "009.xml" ) ) )
+ {
+ parser.setInput( is, null );
while ( parser.nextToken() != XmlPullParser.END_DOCUMENT )
;
- fail( "UTF-16 BOM plus xml decl of utf-8 (using UTF-8 coding) incompatible" );
+ fail( "UTF-16 BOM plus xml decl of UTF-8 (using UTF-8 coding) incompatible" );
}
catch ( XmlPullParserException e )
{
- assertTrue( e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
+ assertTrue( e.getMessage(), e.getMessage().contains( "UTF-16 BOM in a UTF-8 encoded file is incompatible" ) );
}
}
diff --git a/src/test/resources/xml/test-encoding-ISO-8859-1.xml b/src/test/resources/xml/test-encoding-ISO-8859-1.xml
index ae0aefe7..e37a912c 100644
--- a/src/test/resources/xml/test-encoding-ISO-8859-1.xml
+++ b/src/test/resources/xml/test-encoding-ISO-8859-1.xml
@@ -1,1503 +1,3 @@
-
-