I have this issue lingering for a while. I have an xml which has special characters and I am trying to parse them and have serious problem. Experts please advice.
Here is the xml
<?xml version="1.0" encoding="UTF-8"?>
<user_data>
<time_taken>ÀÀÀÀ</time_tak
en> ///SPECIAL CHARACTERS.
</user_data>
Here is my servlet which parses:
protected ModelAndView handleRequestInternal(Http
ServletReq
uest request, HttpServletResponse response) throws Exception
{
request.setCharacterEncodi
ng("UTF-8"
);
int contentLength = request.getContentLength()
;
if ( contentLength == -1 ) {
// Content length must be known.
throw new ServletException( "Content-Length must be specified" );
}
String contentType = request.getContentType();
System.out.println("reques
t.getConte
ntType(): " +request.getContentType() );
System.out.println("reques
t.getConte
ntLength()
: " +request.getContentLength(
) );
boolean contentTypeIsOkay = false;
// Content-Type must be specified.
if ( contentType != null ) {
// The type must be plain text.
if ( contentType.startsWith( "text/xml" ) ) {
// And it must be UTF-8 encoded (or unspecified, in which case
// we assume
// that it's either UTF-8 or ASCII).
if ( contentType.indexOf( "charset=" ) == -1 ) {
contentTypeIsOkay = true;
} else if ( contentType.indexOf( "charset=utf-8" ) != -1 ) {
contentTypeIsOkay = true;
}
}
}
if ( !contentTypeIsOkay ) {
throw new ServletException(
"Content-Type must be 'text/xml' with 'charset=utf-8' (or unspecified charset)" );
}
InputStream in = request.getInputStream();
// InputStreamReader in = new InputStreamReader(request.
getInputSt
ream(), "UTF-8");
String decoded = null;
String pay = null;
try {
byte[] payload = new byte[contentLength];
int offset = 0;
int len = contentLength;
int byteCount;
while ( offset < contentLength ) {
byteCount = in.read( payload, offset, len );
if ( byteCount == -1 ) {
throw new ServletException( "Client did not send " + contentLength + " bytes as expected" );
}
offset += byteCount;
len -= byteCount;
}
pay = new String( payload, "UTF-8" );
System.out.println("xml is : " +pay );
decoded = URLDecoder.decode(pay, "utf-8");
System.out.println("decode
d : " +decoded );
} finally {
if ( in != null ) {
in.close();
}
}
sun.io.ByteToCharConverter
fromUnicode;
String convertedStr = decoded;
try {
fromUnicode = sun.io.ByteToCharConverter
.getConver
ter("UTF-8
");
fromUnicode.setSubstitutio
nMode(true
);
char[] convertedChars;
convertedChars = fromUnicode.convertAll(con
vertedStr.
getBytes()
);
convertedStr = new String(convertedChars);
System.out.println("conver
tedStr : " +convertedStr );
} catch (UnsupportedEncodingExcept
ion e) {
e.printStackTrace();
}
InputStream inputStream = request.getInputStream();
System.out.println("reques
t.getChara
cterEncodi
ng() : " + request.getCharacterEncodi
ng() );
SAXBuilder builder = null;
// Create an instance of the tester and test
builder = new SAXBuilder();
Document doc= builder.build(new java.io.ByteArrayInputStre
am(convert
edStr.getB
ytes()));
//////ERROR : Illegal XML character: .
Element user_data =doc.getRootElement();