一、前言
关于htmlcompressor源码包中com.googlecode.htmlcompressor.compressor.XmlCompressor、com.googlecode.htmlcompressor.compressor.Compressor定义XML文本压缩接口及实现类,可以对空格、回车、换行及注释等进行过滤压缩处理。
二、源码说明
1.Compressor接口
package com.googlecode.htmlcompressor.compressor;@b@@b@public abstract interface Compressor@b@{@b@ public abstract String compress(String paramString);@b@}
2.XmlCompressor实现类
package com.googlecode.htmlcompressor.compressor;@b@@b@import java.text.MessageFormat;@b@import java.util.ArrayList;@b@import java.util.List;@b@import java.util.regex.Matcher;@b@import java.util.regex.Pattern;@b@@b@public class XmlCompressor@b@ implements Compressor@b@{@b@ private boolean enabled;@b@ private boolean removeComments;@b@ private boolean removeIntertagSpaces;@b@ protected static final String tempCdataBlock = "%%%COMPRESS~CDATA~{0,number,#}%%%";@b@ protected static final Pattern cdataPattern = Pattern.compile("<!\\[CDATA\\[.*?\\]\\]>", 34);@b@ protected static final Pattern commentPattern = Pattern.compile("<!--.*?-->", 34);@b@ protected static final Pattern intertagPattern = Pattern.compile(">\\s+<", 34);@b@ protected static final Pattern tagEndSpacePattern = Pattern.compile("(<(?:[^>]+?))(?:\\s+?)(/?>)", 34);@b@ protected static final Pattern multispacePattern = Pattern.compile("\\s+(?=[^<]*?>)", 34);@b@ protected static final Pattern tagPropertyPattern = Pattern.compile("(\\s\\w+)\\s*=\\s*(?=[^<]*?>)", 2);@b@ protected static final Pattern tempCdataPattern = Pattern.compile("%%%COMPRESS~CDATA~(\\d+?)%%%", 34);@b@@b@ public XmlCompressor()@b@ {@b@ this.enabled = true;@b@@b@ this.removeComments = true;@b@ this.removeIntertagSpaces = true;@b@ }@b@@b@ public String compress(String xml)@b@ {@b@ if ((!(this.enabled)) || (xml == null) || (xml.length() == 0)) {@b@ return xml;@b@ }@b@@b@ List cdataBlocks = new ArrayList();@b@@b@ xml = preserveBlocks(xml, cdataBlocks);@b@@b@ xml = processXml(xml);@b@@b@ xml = returnBlocks(xml, cdataBlocks);@b@@b@ return xml.trim();@b@ }@b@@b@ protected String preserveBlocks(String xml, List<String> cdataBlocks)@b@ {@b@ Matcher matcher = cdataPattern.matcher(xml);@b@ int index = 0;@b@ StringBuffer sb = new StringBuffer();@b@ while (matcher.find()) {@b@ cdataBlocks.add(matcher.group(0));@b@ matcher.appendReplacement(sb, MessageFormat.format("%%%COMPRESS~CDATA~{0,number,#}%%%", new Object[] { Integer.valueOf(index++) }));@b@ }@b@ matcher.appendTail(sb);@b@ xml = sb.toString();@b@@b@ return xml;@b@ }@b@@b@ protected String returnBlocks(String xml, List<String> cdataBlocks)@b@ {@b@ Matcher matcher = tempCdataPattern.matcher(xml);@b@ StringBuffer sb = new StringBuffer();@b@ while (matcher.find())@b@ matcher.appendReplacement(sb, Matcher.quoteReplacement((String)cdataBlocks.get(Integer.parseInt(matcher.group(1)))));@b@@b@ matcher.appendTail(sb);@b@ xml = sb.toString();@b@@b@ return xml;@b@ }@b@@b@ protected String processXml(String xml)@b@ {@b@ xml = removeComments(xml);@b@@b@ xml = removeIntertagSpaces(xml);@b@@b@ xml = removeSpacesInsideTags(xml);@b@@b@ return xml;@b@ }@b@@b@ protected String removeSpacesInsideTags(String xml)@b@ {@b@ xml = multispacePattern.matcher(xml).replaceAll(" ");@b@@b@ xml = tagPropertyPattern.matcher(xml).replaceAll("$1=");@b@@b@ xml = tagEndSpacePattern.matcher(xml).replaceAll("$1$2");@b@ return xml;@b@ }@b@@b@ protected String removeIntertagSpaces(String xml)@b@ {@b@ if (this.removeIntertagSpaces)@b@ xml = intertagPattern.matcher(xml).replaceAll("><");@b@@b@ return xml;@b@ }@b@@b@ protected String removeComments(String xml)@b@ {@b@ if (this.removeComments)@b@ xml = commentPattern.matcher(xml).replaceAll("");@b@@b@ return xml;@b@ }@b@@b@ public boolean isEnabled()@b@ {@b@ return this.enabled;@b@ }@b@@b@ public void setEnabled(boolean enabled)@b@ {@b@ this.enabled = enabled;@b@ }@b@@b@ public boolean isRemoveComments()@b@ {@b@ return this.removeComments;@b@ }@b@@b@ public void setRemoveComments(boolean removeComments)@b@ {@b@ this.removeComments = removeComments;@b@ }@b@@b@ public boolean isRemoveIntertagSpaces()@b@ {@b@ return this.removeIntertagSpaces;@b@ }@b@@b@ public void setRemoveIntertagSpaces(boolean removeIntertagSpaces)@b@ {@b@ this.removeIntertagSpaces = removeIntertagSpaces;@b@ }@b@}