一、前言
关于itextpdf-5.1.1.jar源码包中com.itextpdf.text.xml.XMLUtil文档工具类,对字符串指定字符进行转伪码处理、根据二进制数据获取字符集类型,详情参见源码说明部分。
二、源码说明
package com.itextpdf.text.xml;@b@@b@public class XMLUtil@b@{@b@ public static String escapeXML(String s, boolean onlyASCII)@b@ {@b@ char[] cc = s.toCharArray();@b@ int len = cc.length;@b@ StringBuffer sb = new StringBuffer();@b@ for (int k = 0; k < len; ++k) {@b@ int c = cc[k];@b@ switch (c)@b@ {@b@ case 60:@b@ sb.append("<");@b@ break;@b@ case 62:@b@ sb.append(">");@b@ break;@b@ case 38:@b@ sb.append("&");@b@ break;@b@ case 34:@b@ sb.append(""");@b@ break;@b@ case 39:@b@ sb.append("'");@b@ break;@b@ default:@b@ if ((c == 9) || (c == 10) || (c == 13) || ((c >= 32) && (c <= 55295)) || ((c >= 57344) && (c <= 65533)) || ((c >= 65536) && (c <= 1114111)))@b@ {@b@ if ((onlyASCII) && (c > 127))@b@ sb.append("&#").append(c).append(';');@b@ else@b@ sb.append((char)c);@b@ }@b@ }@b@ }@b@ return sb.toString();@b@ }@b@@b@ public static String getEncodingName(byte[] b4)@b@ {@b@ int b0 = b4[0] & 0xFF;@b@ int b1 = b4[1] & 0xFF;@b@ if ((b0 == 254) && (b1 == 255))@b@ {@b@ return "UTF-16BE";@b@ }@b@ if ((b0 == 255) && (b1 == 254))@b@ {@b@ return "UTF-16LE";@b@ }@b@@b@ int b2 = b4[2] & 0xFF;@b@ if ((b0 == 239) && (b1 == 187) && (b2 == 191)) {@b@ return "UTF-8";@b@ }@b@@b@ int b3 = b4[3] & 0xFF;@b@ if ((b0 == 0) && (b1 == 0) && (b2 == 0) && (b3 == 60))@b@ {@b@ return "ISO-10646-UCS-4";@b@ }@b@ if ((b0 == 60) && (b1 == 0) && (b2 == 0) && (b3 == 0))@b@ {@b@ return "ISO-10646-UCS-4";@b@ }@b@ if ((b0 == 0) && (b1 == 0) && (b2 == 60) && (b3 == 0))@b@ {@b@ return "ISO-10646-UCS-4";@b@ }@b@ if ((b0 == 0) && (b1 == 60) && (b2 == 0) && (b3 == 0))@b@ {@b@ return "ISO-10646-UCS-4";@b@ }@b@ if ((b0 == 0) && (b1 == 60) && (b2 == 0) && (b3 == 63))@b@ {@b@ return "UTF-16BE";@b@ }@b@ if ((b0 == 60) && (b1 == 0) && (b2 == 63) && (b3 == 0))@b@ {@b@ return "UTF-16LE";@b@ }@b@ if ((b0 == 76) && (b1 == 111) && (b2 == 167) && (b3 == 148))@b@ {@b@ return "CP037";@b@ }@b@@b@ return "UTF-8";@b@ }@b@}