一、前言
定义编码间相互转换工具TranscodingUtil类,实现gbk转utf8、utf8转gbk、GBK转Unicode、Unicode转GBK、utf转Unicode、unicode转Utf8等常见编码转换处理,详情参见代码示例说明。
二、代码示例
public class TranscodingUtil {@b@ @b@ public static String gbk2utf8(String gbk) {@b@ String l_temp = GBK2Unicode(gbk);@b@ l_temp = unicodeToUtf8(l_temp);@b@@b@ return l_temp;@b@ }@b@@b@ public static String utf82gbk(String utf) throws Exception{@b@ String l_temp = utf8ToUnicode(utf);@b@ l_temp = Unicode2GBK(l_temp);@b@@b@ return l_temp;@b@ }@b@@b@ public static String GBK2Unicode(String str) {@b@ StringBuffer result = new StringBuffer();@b@ for (int i = 0; i < str.length(); ++i) {@b@ char chr1 = str.charAt(i);@b@@b@ if (!(isNeedConvert(chr1))) {@b@ result.append(chr1);@b@ } else {@b@ result.append("\\u" + Integer.toHexString(chr1));@b@ }@b@ }@b@ return result.toString();@b@ }@b@@b@ public static String Unicode2GBK(String dataStr) throws Exception{@b@ int index = 0;@b@ StringBuffer buffer = new StringBuffer();@b@@b@ int li_len = dataStr.length();@b@ while (true) {@b@ while (true) {@b@ if (index >= li_len)@b@ return buffer.toString();@b@ if ((index < li_len - 1)@b@ && ("\\u".equals(dataStr.substring(index, index + 2))))@b@ break;@b@ buffer.append(dataStr.charAt(index));@b@@b@ ++index;@b@ }@b@@b@ String charStr = "";@b@ charStr = dataStr.substring(index + 2, index + 6);@b@@b@ char letter = (char) Integer.parseInt(charStr, 16);@b@@b@ buffer.append(letter);@b@ index += 6;@b@ }@b@@b@ }@b@@b@ public static boolean isNeedConvert(char para) {@b@ return ((para & 0xFF) != para);@b@ }@b@@b@ public static String utf8ToUnicode(String inStr) {@b@ char[] myBuffer = inStr.toCharArray();@b@@b@ StringBuffer sb = new StringBuffer();@b@ for (int i = 0; i < inStr.length(); ++i) {@b@ Character.UnicodeBlock ub = Character.UnicodeBlock.of(myBuffer[i]);@b@ if (ub == Character.UnicodeBlock.BASIC_LATIN) {@b@ sb.append(myBuffer[i]);@b@ } else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {@b@ int j = myBuffer[i] - 65248;@b@ sb.append((char) j);@b@ } else {@b@ short s = (short) myBuffer[i];@b@ String hexS = Integer.toHexString(s);@b@ String unicode = "\\u" + hexS;@b@ sb.append(unicode.toLowerCase());@b@ }@b@ }@b@ return sb.toString();@b@ }@b@@b@ public static String unicodeToUtf8(String theString) {@b@ int len = theString.length();@b@ StringBuffer outBuffer = new StringBuffer(len);@b@ int x = 0;@b@ while (true) {@b@ char aChar;@b@ while (true) {@b@ while (true) {@b@ if (x >= len)@b@ return outBuffer.toString();@b@ aChar = theString.charAt(x++);@b@ if (aChar != '\\'){@b@ outBuffer.append(aChar);@b@ return outBuffer.toString();@b@ }@b@ aChar = theString.charAt(x++);@b@ if (aChar != 'u')@b@ break;@b@ int value = 0;@b@ for (int i = 0; i < 4; ++i) {@b@ aChar = theString.charAt(x++);@b@ switch (aChar) {@b@ case '0':@b@ case '1':@b@ case '2':@b@ case '3':@b@ case '4':@b@ case '5':@b@ case '6':@b@ case '7':@b@ case '8':@b@ case '9':@b@ value = (value << 4) + aChar - 48;@b@ break;@b@ case 'a':@b@ case 'b':@b@ case 'c':@b@ case 'd':@b@ case 'e':@b@ case 'f':@b@ value = (value << 4) + 10 + aChar - 97;@b@ break;@b@ case 'A':@b@ case 'B':@b@ case 'C':@b@ case 'D':@b@ case 'E':@b@ case 'F':@b@ value = (value << 4) + 10 + aChar - 65;@b@ break;@b@ case ':':@b@ case ';':@b@ case '<':@b@ case '=':@b@ case '>':@b@ case '?':@b@ case '@':@b@ case 'G':@b@ case 'H':@b@ case 'I':@b@ case 'J':@b@ case 'K':@b@ case 'L':@b@ case 'M':@b@ case 'N':@b@ case 'O':@b@ case 'P':@b@ case 'Q':@b@ case 'R':@b@ case 'S':@b@ case 'T':@b@ case 'U':@b@ case 'V':@b@ case 'W':@b@ case 'X':@b@ case 'Y':@b@ case 'Z':@b@ case '[':@b@ case '\\':@b@ case ']':@b@ case '^':@b@ case '_':@b@ case ''':@b@ default:@b@ throw new IllegalArgumentException(@b@ "Malformed \\uxxxx encoding.");@b@ }@b@ }@b@@b@ outBuffer.append((char) value);@b@ }@b@ if (aChar == 't')@b@ aChar = '\t';@b@ else if (aChar == 'r')@b@ aChar = '\r';@b@ else if (aChar == 'n')@b@ aChar = '\n';@b@ else if (aChar == 'f')@b@ aChar = '\f';@b@ outBuffer.append(aChar);@b@ }@b@@b@ }@b@ }@b@@b@}