一、前言
关于html-2.0.0源码包中fr.natoine.html.HTMLPage类,对html页面对象解析渲染解析处理,具体参见源码说明部分。
二、源码说明
package fr.natoine.html;@b@@b@import fr.natoine.stringOp.StringOp;@b@import java.io.IOException;@b@import java.io.PrintStream;@b@import java.nio.ByteBuffer;@b@import java.nio.CharBuffer;@b@import java.nio.charset.CharacterCodingException;@b@import java.nio.charset.Charset;@b@import java.nio.charset.CharsetDecoder;@b@import java.nio.charset.CharsetEncoder;@b@import java.util.regex.Matcher;@b@import java.util.regex.Pattern;@b@import org.apache.http.client.ClientProtocolException;@b@import org.apache.http.client.HttpClient;@b@import org.apache.http.client.ResponseHandler;@b@import org.apache.http.client.methods.HttpGet;@b@import org.apache.http.conn.ClientConnectionManager;@b@import org.apache.http.impl.client.BasicResponseHandler;@b@import org.apache.http.impl.client.DefaultHttpClient;@b@import org.apache.http.params.HttpParams;@b@import org.htmlparser.Node;@b@import org.htmlparser.Parser;@b@import org.htmlparser.Tag;@b@import org.htmlparser.filters.CssSelectorNodeFilter;@b@import org.htmlparser.filters.NodeClassFilter;@b@import org.htmlparser.nodes.TagNode;@b@import org.htmlparser.nodes.TextNode;@b@import org.htmlparser.tags.BodyTag;@b@import org.htmlparser.tags.HeadTag;@b@import org.htmlparser.tags.Html;@b@import org.htmlparser.tags.LinkTag;@b@import org.htmlparser.tags.MetaTag;@b@import org.htmlparser.tags.Span;@b@import org.htmlparser.tags.StyleTag;@b@import org.htmlparser.tags.TitleTag;@b@import org.htmlparser.util.NodeList;@b@import org.htmlparser.util.ParserException;@b@@b@public class HTMLPage@b@{@b@ private String url;@b@ private String domain;@b@ private String title;@b@ private String css;@b@ private String body;@b@ private String scripts;@b@ private String wrapperDiv;@b@ private String encoding;@b@ private boolean valid;@b@ private static String DEFAULT_WRAPPER = "PortletBrowserContent";@b@ private static int DEFAULT_TIME_TO_CREATE = 3000;@b@@b@ public HTMLPage()@b@ {@b@ this.valid = false;@b@ this.url = "not a valid url";@b@ this.domain = "not a valid domain";@b@ this.title = "no title";@b@ this.css = "";@b@ this.body = "";@b@ this.wrapperDiv = DEFAULT_WRAPPER;@b@ this.encoding = null;@b@ }@b@@b@ public HTMLPage(String _url)@b@ {@b@ this(_url, DEFAULT_TIME_TO_CREATE, DEFAULT_WRAPPER);@b@ }@b@@b@ public HTMLPage(String _url, int _time_to_create, String _wrapperDiv)@b@ {@b@ this.wrapperDiv = _wrapperDiv;@b@@b@ if (_url.endsWith("/")) this.url = _url.substring(0, _url.length() - 1);@b@ else this.url = _url;@b@@b@ if (_url.startsWith("http://"))@b@ {@b@ this.domain = "http://" + extractDomain(_url);@b@ String response_content = extractFullContentPage(_url, _time_to_create);@b@@b@ if (this.valid)@b@ {@b@ extractBodyTitleCss(response_content, _time_to_create);@b@ correctHREF("javascript:browserHREF");@b@@b@ encodeBody();@b@ }@b@ else@b@ {@b@ this.title = this.url;@b@ }@b@ }@b@ finalizeBody();@b@ }@b@@b@ private void finalizeBody()@b@ {@b@ if ((this.body != null) && (this.body.length() > 0))@b@ {@b@ Pattern p = Pattern.compile("(<body>)|(<BODY>)");@b@ Matcher m = p.matcher("");@b@ m.reset(this.body);@b@ this.body = m.replaceAll("<div id='" + this.wrapperDiv + "'>");@b@ p = Pattern.compile("(</body>)|(</BODY>)");@b@ m = p.matcher("");@b@ m.reset(this.body);@b@ this.body = m.replaceAll("</div>");@b@ } else {@b@ this.body = "";@b@ }@b@ }@b@@b@ private void encodeBody()@b@ {@b@ String[] olds;@b@ String[] news;@b@ int i;@b@ if (this.encoding != null)@b@ {@b@ if (this.encoding.equalsIgnoreCase("UTF-8"))@b@ {@b@ CharsetEncoder encoder = Charset.forName("ISO-8859-1").newEncoder();@b@ try {@b@ String decoded = new String(encoder.encode(CharBuffer.wrap(this.body.toCharArray())).array());@b@@b@ CharsetDecoder decoder = Charset.forName(this.encoding).newDecoder();@b@ decoded = decoder.decode(ByteBuffer.wrap(decoded.getBytes())).toString();@b@@b@ this.body = decoded;@b@ }@b@ catch (CharacterCodingException e) {@b@ e.printStackTrace();@b@ }@b@ }@b@ }@b@ else@b@ {@b@ olds = new String[16];@b@ news = new String[16];@b@ olds[0] = "â"; news[0] = "â";@b@ olds[1] = "à"; news[1] = "à";@b@ olds[2] = "é"; news[2] = "é";@b@ olds[3] = "ê"; news[3] = "ê";@b@ olds[4] = "è"; news[4] = "è";@b@ olds[5] = "ë"; news[5] = "ë";@b@ olds[6] = "î"; news[6] = "î";@b@ olds[7] = "ï"; news[7] = "ï";@b@ olds[8] = "ô"; news[8] = "ô";@b@ olds[9] = "œ"; news[9] = "œ";@b@ olds[10] = "û"; news[10] = "û";@b@ olds[11] = "ù"; news[11] = "ù";@b@ olds[12] = "ü"; news[12] = "ü";@b@ olds[13] = "ç"; news[13] = "ç";@b@ olds[14] = "<"; news[14] = "<";@b@ olds[15] = ">"; news[15] = ">";@b@ for (i = 0; i < olds.length; ++i)@b@ {@b@ this.body = this.body.replaceAll(olds[i], news[i]);@b@ }@b@ }@b@ }@b@@b@ private void correctHREF(String _javascript_href_wrapper)@b@ {@b@ Parser parser = Parser.createParser(this.body, null);@b@ try@b@ {@b@ NodeList nl = parser.parse(null);@b@ NodeList a_hrefs = nl.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class), true);@b@@b@ int nb_a = a_hrefs.size();@b@@b@ char quote = '"';@b@ for (int cpt_a = 0; cpt_a < nb_a; ++cpt_a)@b@ {@b@ Node a = a_hrefs.elementAt(cpt_a);@b@ String original_href = ((TagNode)a).getAttribute("href");@b@ if (original_href != null)@b@ {@b@ String true_href;@b@ if (original_href.startsWith("http")) { true_href = original_href;@b@ } else if (original_href.startsWith("./"))@b@ {@b@ true_href = this.domain + "/" + original_href.substring(2);@b@ }@b@ else if (original_href.startsWith("../"))@b@ {@b@ int firstindexOfslash_href = original_href.indexOf("/");@b@ int lastindexOfslash = this.url.lastIndexOf("/");@b@ if (lastindexOfslash == this.url.length())@b@ {@b@ lastindexOfslash = this.url.substring(0, this.url.length() - 1).lastIndexOf("/");@b@ }@b@ true_href = this.url.subSequence(0, lastindexOfslash) + "/" + original_href.substring(firstindexOfslash_href);@b@ }@b@ else if (original_href.startsWith("/")) { true_href = this.domain + original_href; } else {@b@ true_href = this.url + "/" + original_href; }@b@ ((TagNode)a).setAttribute("href", _javascript_href_wrapper + "('" + true_href + "')", quote);@b@ }@b@ }@b@@b@ this.body = nl.toHtml();@b@ }@b@ catch (ParserException e) {@b@ System.out.println("[HTMLPage.correctHREF] problems while Parsing");@b@ e.printStackTrace();@b@ }@b@ }@b@@b@ public String extractTitle() throws ParserException@b@ {@b@ String response_content = extractFullContentPage(this.url, DEFAULT_TIME_TO_CREATE);@b@ if (this.valid)@b@ {@b@ this.title = this.url;@b@ Parser parser = Parser.createParser(response_content, null);@b@ NodeList nl = parser.parse(null);@b@ NodeList titles = nl.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class), true);@b@ if ((titles.size() > 0) && @b@ (titles.elementAt(0) instanceof Tag)) this.title = ((Tag)titles.elementAt(0)).getFirstChild().getText();@b@ }@b@@b@ return this.title;@b@ }@b@@b@ public String extractDomain(String _url)@b@ {@b@ String domain = "not a valid domain";@b@ String[] _url_split = _url.split("/");@b@ if (_url_split.length > 1) return _url_split[2];@b@ return domain;@b@ }@b@@b@ public String extractFullContentResource(String _url, int _time_to_respond)@b@ {@b@ String response_content = null;@b@ HttpClient httpclient = new DefaultHttpClient();@b@@b@ httpclient.getParams().setBooleanParameter("http.protocol.handle-redirects", true);@b@ httpclient.getParams().setBooleanParameter("http.protocol.handle-authentication", true);@b@ httpclient.getParams().setIntParameter("http.socket.timeout", 3000);@b@@b@ HttpGet httpget = new HttpGet(_url);@b@ try@b@ {@b@ ResponseHandler responseHandler = new BasicResponseHandler();@b@ String responseBody = (String)httpclient.execute(httpget, responseHandler);@b@ if (responseBody != null)@b@ {@b@ response_content = responseBody;@b@ }@b@@b@ return response_content;@b@ }@b@ catch (ClientProtocolException e)@b@ {@b@ System.out.println("[HTMLPage.extractFullContentCssLink] url : " + _url + " doesn't support GET requests !!! ");@b@ e.printStackTrace();@b@@b@ return response_content;@b@ }@b@ catch (IOException e)@b@ {@b@ System.out.println("[HTMLPage.extractFullContentCssLink] url : " + _url + " send no data !!! Not responding ... ");@b@ e.printStackTrace();@b@@b@ return response_content;@b@ }@b@ finally@b@ {@b@ httpclient.getConnectionManager().shutdown(); }@b@ return response_content;@b@ }@b@@b@ private String extractFullContentPage(String _url, int _time_to_respond)@b@ {@b@ String content = extractFullContentResource(_url, _time_to_respond);@b@ if (content != null) this.valid = true;@b@ else { this.valid = false;@b@ }@b@@b@ return content;@b@ }@b@@b@ private void extractBodyTitleCss(String _html, int _time_to_extract_css)@b@ {@b@ Parser parser;@b@ try {@b@ parser = Parser.createParser(_html, null);@b@ NodeList nl = parser.parse(null);@b@@b@ NodeList htmls = nl.extractAllNodesThatMatch(new NodeClassFilter(Html.class));@b@ if (htmls.size() > 0)@b@ {@b@ NodeList heads = htmls.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(HeadTag.class));@b@ NodeList bodys = htmls.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(BodyTag.class));@b@ int nb_heads_node = heads.size();@b@ if (nb_heads_node > 0)@b@ {@b@ int cpt_metas;@b@ NodeList titles = heads.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class));@b@ if (titles.size() > 0)@b@ {@b@ if (titles.elementAt(0) instanceof Tag)@b@ {@b@ Tag tag_title = (Tag)titles.elementAt(0);@b@ this.title = tag_title.getFirstChild().getText();@b@ } else {@b@ this.title = this.url;@b@ }@b@ }@b@ else {@b@ System.out.println("[HTMLPage.extractBodyTitleCss] no title tag, url for default title value");@b@ this.title = this.url;@b@ }@b@@b@ NodeList metas = heads.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(MetaTag.class));@b@ if (metas.size() > 0)@b@ {@b@ int metas_size = metas.size();@b@ for (cpt_metas = 0; cpt_metas < metas_size; ++cpt_metas)@b@ {@b@ MetaTag meta = (MetaTag)metas.elementAt(cpt_metas);@b@ String httpEquiv = meta.getHttpEquiv();@b@ if (httpEquiv != null)@b@ {@b@ if (httpEquiv.equalsIgnoreCase("Content-Type"))@b@ {@b@ String content = meta.getMetaContent();@b@ if (content.contains("charset"))@b@ {@b@ int charset_index = content.indexOf("charset");@b@ int egal_index = content.indexOf("=", charset_index);@b@ String charset = content.substring(egal_index + 1, content.length());@b@ this.encoding = StringOp.deleteBlanks(charset);@b@ break;@b@ }@b@ }@b@ }@b@ else@b@ {@b@ String charset = meta.getAttribute("charset");@b@ if (charset != null)@b@ {@b@ this.encoding = charset;@b@ break;@b@ }@b@ }@b@ }@b@ }@b@@b@ String wip_css = "";@b@@b@ for (int cpt_heads_node = 0; cpt_heads_node < nb_heads_node; ++cpt_heads_node)@b@ {@b@ NodeList headChildren = heads.elementAt(cpt_heads_node).getChildren();@b@@b@ int nb_insideHead_nodes = headChildren.size();@b@@b@ for (int cpt_insideHead_nodes = 0; cpt_insideHead_nodes < nb_insideHead_nodes; ++cpt_insideHead_nodes)@b@ {@b@ Node currentNode = headChildren.elementAt(cpt_insideHead_nodes);@b@@b@ if (currentNode instanceof StyleTag)@b@ {@b@ wip_css = wip_css.concat(((StyleTag)currentNode).getStyleCode());@b@ }@b@@b@ if ((currentNode instanceof TagNode) && @b@ (((TagNode)currentNode).getRawTagName().equalsIgnoreCase("link")) && (((TagNode)currentNode).getAttribute("rel").equalsIgnoreCase("stylesheet")))@b@ {@b@ String href_css = ((TagNode)currentNode).getAttribute("href");@b@@b@ String true_url_href_css = null;@b@ if (href_css.startsWith("http://")) { true_url_href_css = href_css;@b@ }@b@ else if (href_css.startsWith("./"))@b@ {@b@ true_url_href_css = this.domain + "/" + href_css.substring(2);@b@ }@b@ else if (href_css.startsWith("../"))@b@ {@b@ int firstindexOfslash_href = href_css.indexOf("/");@b@ int lastindexOfslash = this.url.lastIndexOf("/");@b@ if (lastindexOfslash == this.url.length())@b@ {@b@ lastindexOfslash = this.url.substring(0, this.url.length() - 1).lastIndexOf("/");@b@ }@b@ true_url_href_css = this.url.subSequence(0, lastindexOfslash) + "/" + href_css.substring(firstindexOfslash_href);@b@ }@b@ else if (href_css.startsWith("/")) { true_url_href_css = this.domain + href_css; } else {@b@ true_url_href_css = this.url + "/" + href_css;@b@ }@b@@b@ String css_content = extractFullContentResource(true_url_href_css, _time_to_extract_css);@b@ if (css_content != null) wip_css = wip_css.concat(css_content);@b@@b@ }@b@@b@ }@b@@b@ }@b@@b@ this.css = deleteCommentsNewLine(wip_css, this.wrapperDiv);@b@ }@b@ else@b@ {@b@ System.out.println("[HTMLPage.extractBodyTitleCss] no head tag, default title value = url");@b@ this.title = this.url;@b@ }@b@ if (bodys.size() > 0)@b@ {@b@ this.body = bodys.elementAt(0).toHtml();@b@ }@b@ }@b@ else@b@ {@b@ System.out.println("[HTMLPage.extractBodyTitleCss] not a valid HTML content");@b@ this.title = this.url;@b@ }@b@ }@b@ catch (ParserException e)@b@ {@b@ this.title = this.url;@b@ System.out.println("[HTMLPage.extractBodyTitleCss] error parsing HTML content");@b@ e.printStackTrace();@b@ }@b@ }@b@@b@ public String deleteCommentsNewLine(String _wip_css, String _new_englobing_div)@b@ {@b@ if ((_wip_css == null) || (_wip_css.length() == 0)) { return "";@b@ }@b@@b@ Pattern p = Pattern.compile("(?:/\\*(?:[^*]|(?:\\*+[^*/]))*\\*+/)", 8);@b@ Matcher m = p.matcher("");@b@ m.reset(_wip_css);@b@ String result = m.replaceAll("");@b@@b@ p = Pattern.compile("(\n)|(\t)");@b@ m = p.matcher("");@b@ m.reset(result);@b@ result = m.replaceAll("");@b@@b@ p = Pattern.compile(" (?= )|(?<= ) ");@b@ m = p.matcher("");@b@ m.reset(result);@b@ result = m.replaceAll(" ");@b@@b@ p = Pattern.compile("}");@b@ m = p.matcher("");@b@ m.reset(result);@b@ result = m.replaceAll("} #" + _new_englobing_div + " ");@b@ int cpt_last_spaces_index = result.length();@b@ while ((cpt_last_spaces_index > 0) && (result.charAt(cpt_last_spaces_index - 1) == ' '))@b@ {@b@ --cpt_last_spaces_index;@b@ }@b@ result = result.substring(0, cpt_last_spaces_index);@b@ if (result.endsWith("#" + _new_englobing_div)) result = result.substring(0, result.lastIndexOf("#" + _new_englobing_div));@b@ result = "#" + _new_englobing_div + " ".concat(result);@b@ return result;@b@ }@b@@b@ public String toString()@b@ {@b@ String _to_print = "Classe " + super.getClass();@b@ _to_print = _to_print.concat(" url : " + this.url);@b@ _to_print = _to_print.concat(" domain : " + this.domain);@b@ _to_print = _to_print.concat(" title : " + this.title);@b@ _to_print = _to_print.concat(" css : " + this.css);@b@ _to_print = _to_print.concat(" body : " + this.body);@b@ if (this.valid) _to_print = _to_print.concat(" valid !!!");@b@ else _to_print = _to_print.concat(" not valid !!!");@b@ return _to_print;@b@ }@b@@b@ public String getEncoding()@b@ {@b@ return this.encoding;@b@ }@b@@b@ public void setEncoding(String encoding)@b@ {@b@ this.encoding = encoding;@b@ }@b@@b@ public String getURL() {@b@ return this.url; }@b@@b@ public void setURL(String uRL) {@b@ this.url = uRL; }@b@@b@ public String getDomain() {@b@ return this.domain; }@b@@b@ public void setDomain(String domain) {@b@ this.domain = domain; }@b@@b@ public String getTitle() {@b@ return this.title; }@b@@b@ public void setTitle(String title) {@b@ this.title = title; }@b@@b@ public String getCss() {@b@ return this.css; }@b@@b@ public void setCss(String css) {@b@ this.css = css; }@b@@b@ public String getBody() {@b@ return this.body; }@b@@b@ public void setBody(String body) {@b@ this.body = body;@b@ }@b@@b@ public void setScripts(String scripts)@b@ {@b@ this.scripts = scripts;@b@ }@b@@b@ public String getScripts() {@b@ return this.scripts;@b@ }@b@@b@ public String[] xpointerSplit(String _xpointer)@b@ {@b@ String xpointer_tag = "#xpointer(";@b@ int begin_sub = _xpointer.indexOf(xpointer_tag) + xpointer_tag.length();@b@ String clean_xpointer = _xpointer.substring(begin_sub, _xpointer.length());@b@ clean_xpointer = clean_xpointer.substring(0, clean_xpointer.indexOf(44));@b@ return clean_xpointer.split("/");@b@ }@b@@b@ private int getTextPositionXpointer(String _xpointer)@b@ {@b@ int coma_index = _xpointer.indexOf(44);@b@ if ((coma_index > 0) && (coma_index < _xpointer.length()))@b@ {@b@ String position = _xpointer.substring(coma_index + 1, _xpointer.length() - 1);@b@ return Integer.parseInt(position);@b@ }@b@ return -1;@b@ }@b@@b@ public boolean isChildXPointer(String _xpointer_father, String _xpointer_child)@b@ throws ParserException@b@ {@b@ if (_xpointer_father.contains(","))@b@ {@b@ String clean_xpointer_father = _xpointer_father.split(",")[0];@b@ if (_xpointer_child.startsWith(clean_xpointer_father)) return true;@b@ }@b@ Parser parser = Parser.createParser(this.body, null);@b@ NodeList nl = parser.parse(null);@b@ Node father = getNodeXpointer(_xpointer_father, nl);@b@ Node child = getNodeXpointer(_xpointer_child, nl);@b@@b@ return isChildNode(father, child);@b@ }@b@@b@ public boolean isChildNode(Node _father, Node _child)@b@ {@b@ boolean to_return = false;@b@ NodeList children = _father.getChildren();@b@ int children_length = 0;@b@ if (children != null) children_length = children.size();@b@ int cpt_children = 0;@b@ while ((!(to_return)) && (cpt_children < children_length))@b@ {@b@ Node to_test = children.elementAt(cpt_children);@b@ if (to_test.equals(_child)) return true;@b@ to_return = isChildNode(to_test, _child);@b@ ++cpt_children;@b@ }@b@ return to_return;@b@ }@b@@b@ public Node getNodeXpointer(String _xpointer, NodeList _nl)@b@ throws ParserException@b@ {@b@ Node current = null;@b@ String[] splited_xpointer = xpointerSplit(_xpointer);@b@@b@ int nb_selectors = splited_xpointer.length;@b@@b@ if (splited_xpointer[0].contains("body"))@b@ {@b@ current = _nl.elementAt(0);@b@ }@b@ else if (splited_xpointer[0].contains("id"))@b@ {@b@ String id = splited_xpointer[0].substring(splited_xpointer[0].indexOf("id") + 4, splited_xpointer[0].length() - 2);@b@@b@ NodeList nlId = _nl.extractAllNodesThatMatch(new CssSelectorNodeFilter("#" + id), true);@b@ if (nlId.size() > 0) current = nlId.elementAt(0);@b@ }@b@@b@ if (nb_selectors == 1) { return current;@b@ }@b@@b@ int cpt_node_selector = 1;@b@ while ((cpt_node_selector < nb_selectors) && (current != null))@b@ {@b@ int indice_child_node = Integer.parseInt(splited_xpointer[cpt_node_selector]);@b@@b@ NodeList children = current.getChildren();@b@@b@ int nb_children = 0;@b@ int true_nb_children = 0;@b@ int children_size = 0;@b@ if (children != null) children_size = children.size();@b@ while ((nb_children < children_size) && (true_nb_children < indice_child_node))@b@ {@b@ Node current_child = children.elementAt(nb_children);@b@@b@ if ((current_child instanceof Span) && (((Span)current_child).getAttribute("class") != null) && (((Span)current_child).getAttribute("class").equals("annotation")))@b@ {@b@ ++nb_children;@b@ }@b@ else if (current_child instanceof TextNode)@b@ {@b@ ++nb_children;@b@ }@b@ else@b@ {@b@ ++nb_children;@b@ ++true_nb_children;@b@ }@b@ }@b@@b@ if ((true_nb_children == indice_child_node) && (children != null) && (children.size() > 0))@b@ {@b@ if (nb_children > 0) current = children.elementAt(nb_children - 1);@b@ else current = children.elementAt(0);@b@ }@b@@b@ ++cpt_node_selector;@b@ }@b@@b@ if ((current instanceof Span) && (((Span)current).getAttribute("class") != null) && (((Span)current).getAttribute("class").equals("annotation")))@b@ {@b@ Node next_sibling = current.getNextSibling();@b@ while ((next_sibling != null) && (next_sibling instanceof Span) && (((Span)next_sibling).getAttribute("class") != null) && (((Span)next_sibling).getAttribute("class").equals("annotation")))@b@ {@b@ next_sibling = next_sibling.getNextSibling();@b@ }@b@ if (next_sibling != null) current = next_sibling;@b@ }@b@ return current;@b@ }@b@@b@ private boolean testSameNodeXpointer(String _xpointer1, String _xpointer2)@b@ throws ParserException@b@ {@b@ if (!(_xpointer1.substring(0, _xpointer1.indexOf("#")).equalsIgnoreCase(_xpointer2.substring(0, _xpointer2.indexOf("#"))))) return false;@b@ if (_xpointer1.substring(0, _xpointer1.indexOf(44)).equalsIgnoreCase(_xpointer2.substring(0, _xpointer2.indexOf(44)))) { return true;@b@ }@b@@b@ Parser parser = Parser.createParser(this.body, null);@b@ NodeList nl = parser.parse(null);@b@ Node node1 = getNodeXpointer(_xpointer1, nl);@b@ Node node2 = getNodeXpointer(_xpointer2, nl);@b@ return (node1 == node2);@b@ }@b@@b@ private void createSpanAndBefore(String _toModify, int _indice_start, int _indice_end, NodeList _newChildrenList, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ String beforeSpan = _toModify.substring(0, _indice_start);@b@ String insideSpan = _toModify.substring(_indice_start, _indice_end);@b@ TextNode before_node = new TextNode(beforeSpan);@b@ _newChildrenList.add(before_node);@b@ Span span = createAnnotation(_span_style, _annotation_content, _annotation_id, insideSpan, _endSpan);@b@ _newChildrenList.add(span);@b@ }@b@@b@ private void createSpanAndSurrounding(String _toModify, int _indice_start, int _indice_end, int _to_modify_content_length, NodeList _newChildrenList, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ createSpanAndBefore(_toModify, _indice_start, _indice_end, _newChildrenList, _span_style, _annotation_content, _annotation_id, _endSpan);@b@ String afterSpan = _toModify.substring(_indice_end, _to_modify_content_length);@b@ TextNode after_node = new TextNode(afterSpan);@b@ _newChildrenList.add(after_node);@b@ }@b@@b@ private Span createAnnotation(String _span_style, String _annotation_content, String _annotation_id, String _text_inside_span, TagNode _endSpan)@b@ {@b@ Span span = new Span();@b@ span.setAttribute("class", "annotation", '"');@b@ span.setAttribute("style", _span_style, '"');@b@ span.setAttribute("title", _annotation_content, '"');@b@ span.setAttribute("id", "annotation_" + _annotation_id, '\'');@b@ NodeList newSpanChildrenList = new NodeList();@b@ TextNode inside_span_node = new TextNode(_text_inside_span);@b@ newSpanChildrenList.add(inside_span_node);@b@ span.setChildren(newSpanChildrenList);@b@ span.setEndPosition(_text_inside_span.length());@b@ span.setEndTag(_endSpan);@b@ return span;@b@ }@b@@b@ public void addAnnotationSpan(String _xpointer_start, String _xpointer_end, String _span_style, String _annotation_content, String _annotation_id)@b@ throws ParserException@b@ {@b@ TagNode endSpan = new TagNode();@b@ endSpan.setTagName("/SPAN");@b@ Parser parser = Parser.createParser(this.body, null);@b@ NodeList nl = parser.parse(null);@b@ int indice_start = getTextPositionXpointer(_xpointer_start);@b@ int indice_end = getTextPositionXpointer(_xpointer_end);@b@ int nb_span_annotation = 0;@b@@b@ if (testSameNodeXpointer(_xpointer_start, _xpointer_end))@b@ {@b@ Node nodeToModify = getNodeXpointer(_xpointer_start, nl);@b@@b@ if (nodeToModify != null)@b@ {@b@ if (indice_start < indice_end) addSpans(nodeToModify, indice_start, indice_end, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ else if (indice_start > indice_end) { addSpans(nodeToModify, indice_end, indice_start, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ }@b@@b@ }@b@@b@ }@b@ else@b@ {@b@ boolean isChild = isChildXPointer(_xpointer_start, _xpointer_end);@b@ Node startNode = getNodeXpointer(_xpointer_start, nl);@b@ Node endNode = getNodeXpointer(_xpointer_end, nl);@b@ if ((startNode != null) && (endNode != null))@b@ {@b@ int[] actual_state;@b@ if (isChild)@b@ {@b@ actual_state = addSpansAllNodeUntilSpecificNode(startNode, endNode, indice_start, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ indice_start = 0;@b@ nb_span_annotation = actual_state[1];@b@@b@ addSpans(endNode, 0, indice_end, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ }@b@ else@b@ {@b@ actual_state = addSpansAllChildren(startNode, indice_start, 0, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ nb_span_annotation = actual_state[1];@b@@b@ Node next_sibling = startNode.getNextSibling();@b@ NodeList toHighlight = new NodeList();@b@ while ((next_sibling != null) && (!(isChildNode(next_sibling, endNode))))@b@ {@b@ toHighlight.add(next_sibling);@b@ next_sibling = next_sibling.getNextSibling();@b@ }@b@ if (next_sibling != null)@b@ {@b@ for (int i = 0; i < toHighlight.size(); ++i)@b@ {@b@ actual_state = addSpansProcessChildrenNoEndLimit(toHighlight.elementAt(i), 0, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ nb_span_annotation = actual_state[1];@b@ }@b@@b@ actual_state = addSpansAllNodeUntilSpecificNode(next_sibling, endNode, 0, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ nb_span_annotation = actual_state[1];@b@ }@b@@b@ addSpans(endNode, 0, indice_end, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ }@b@ }@b@ }@b@ if (nl != null)@b@ {@b@ String new_html = nl.toHtml();@b@ if ((new_html != null) && (new_html.length() > 0)) this.body = new_html;@b@ }@b@ }@b@@b@ private int[] addSpansAllNodeUntilSpecificNode(Node startNode, Node endNode, int indice_start, int nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode endSpan)@b@ {@b@ NodeList startNodeChildren = startNode.getChildren();@b@ if (startNodeChildren != null)@b@ {@b@ NodeList newStartNodeList = new NodeList();@b@ int cpt_children = 0;@b@ Node current_child = startNodeChildren.elementAt(cpt_children);@b@ while ((!(current_child.equals(endNode))) && (!(isChildNode(current_child, endNode))))@b@ {@b@ if (current_child instanceof TextNode)@b@ {@b@ String toModifyContent = ((TextNode)current_child).getText();@b@ int to_modify_content_length = toModifyContent.length();@b@@b@ if (indice_start > to_modify_content_length) indice_start = 0;@b@ if (nb_span_annotation > 0) createSpanAndBefore(toModifyContent, indice_start, to_modify_content_length, newStartNodeList, _span_style, _annotation_content, "" + _annotation_id + "-" + nb_span_annotation, endSpan);@b@ else createSpanAndBefore(toModifyContent, indice_start, to_modify_content_length, newStartNodeList, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ ++nb_span_annotation;@b@ }@b@ else@b@ {@b@ int[] actual_state = addSpansNoEndLimit(current_child, indice_start, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@ newStartNodeList.add(current_child);@b@ indice_start = 0;@b@ nb_span_annotation = actual_state[1];@b@ }@b@ ++cpt_children;@b@ current_child = startNodeChildren.elementAt(cpt_children);@b@ }@b@@b@ while (cpt_children < startNodeChildren.size())@b@ {@b@ newStartNodeList.add(startNodeChildren.elementAt(cpt_children));@b@ ++cpt_children;@b@ }@b@@b@ startNode.setChildren(newStartNodeList);@b@ }@b@ int[] to_return = { indice_start, nb_span_annotation };@b@ return to_return;@b@ }@b@@b@ private int[] addSpans(Node _nodeToModify, int _indice_start, int _indice_end, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@ if (childrenOfNodeToModify != null)@b@ {@b@ int nb_children = childrenOfNodeToModify.size();@b@ if (nb_children > 0)@b@ {@b@ if ((childrenOfNodeToModify.size() == 1) && (childrenOfNodeToModify.elementAt(0) instanceof TextNode))@b@ {@b@ TextNode content_textnode = (TextNode)childrenOfNodeToModify.elementAt(0);@b@ String toModifyContent = content_textnode.getText();@b@ int to_modify_content_length = toModifyContent.length();@b@@b@ if (_indice_start > to_modify_content_length) _indice_start = 0;@b@ if (_indice_end > to_modify_content_length) _indice_end = to_modify_content_length;@b@ NodeList newChildrenList = new NodeList();@b@ if (_nb_span_annotation > 0) createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@ else createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@ _nodeToModify.setChildren(newChildrenList);@b@ ++_nb_span_annotation;@b@ }@b@ else@b@ {@b@ int[] actual_state = addSpansProcessChildren(_nodeToModify, _indice_start, _indice_end, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@ _indice_start = actual_state[0];@b@ _indice_end = actual_state[1];@b@ _nb_span_annotation = actual_state[2];@b@ }@b@ }@b@ }@b@ else if (_nodeToModify instanceof TextNode)@b@ {@b@ String toModifyContent = ((TextNode)_nodeToModify).getText();@b@ int to_modify_content_length = toModifyContent.length();@b@@b@ if (_indice_start > to_modify_content_length) _indice_start = 0;@b@ if (_indice_end > to_modify_content_length) _indice_end = to_modify_content_length;@b@ NodeList newChildrenList = new NodeList();@b@ if (_nb_span_annotation > 0) createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@ else createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@ _nodeToModify.setChildren(newChildrenList);@b@ ++_nb_span_annotation;@b@ }@b@ int[] to_return = { _indice_start, _indice_end, _nb_span_annotation };@b@ return to_return;@b@ }@b@@b@ private int[] addSpansNoEndLimit(Node _nodeToModify, int _indice_start, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@ if (childrenOfNodeToModify != null)@b@ {@b@ int nb_children = childrenOfNodeToModify.size();@b@ if (nb_children > 0)@b@ {@b@ if ((childrenOfNodeToModify.size() == 1) && (childrenOfNodeToModify.elementAt(0) instanceof TextNode))@b@ {@b@ TextNode content_textnode = (TextNode)childrenOfNodeToModify.elementAt(0);@b@ String toModifyContent = content_textnode.getText();@b@ int to_modify_content_length = toModifyContent.length();@b@@b@ if (_indice_start > to_modify_content_length) _indice_start = 0;@b@ NodeList newChildrenList = new NodeList();@b@ if (_nb_span_annotation > 0) createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@ else createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@ _nodeToModify.setChildren(newChildrenList);@b@@b@ ++_nb_span_annotation;@b@ }@b@ else@b@ {@b@ int[] actual_state = addSpansProcessChildrenNoEndLimit(_nodeToModify, _indice_start, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@ _indice_start = actual_state[0];@b@ _nb_span_annotation = actual_state[1];@b@ }@b@ }@b@ }@b@ else if (_nodeToModify instanceof TextNode)@b@ {@b@ String toModifyContent = ((TextNode)_nodeToModify).getText();@b@ int to_modify_content_length = toModifyContent.length();@b@@b@ if (_indice_start > to_modify_content_length) _indice_start = 0;@b@ NodeList newChildrenList = new NodeList();@b@ if (_nb_span_annotation > 0) createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@ else createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@ _nodeToModify.setChildren(newChildrenList);@b@ ++_nb_span_annotation;@b@ }@b@ int[] to_return = { _indice_start, _nb_span_annotation };@b@ return to_return;@b@ }@b@@b@ private int[] addSpansAllChildren(Node _nodeToModify, int _start_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@ if (childrenOfNodeToModify != null)@b@ {@b@ NodeList toModifyNewChildren = new NodeList();@b@ for (int cptchildren = 0; cptchildren < childrenOfNodeToModify.size(); ++cptchildren)@b@ {@b@ Node current_child = childrenOfNodeToModify.elementAt(cptchildren);@b@@b@ if (current_child instanceof TextNode)@b@ {@b@ int[] actual_state = createSpanInTextNode((TextNode)current_child, cptchildren, childrenOfNodeToModify, toModifyNewChildren, _start_indice, ((TextNode)current_child).getText().length(), _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@ _start_indice = 0;@b@ _nb_span_annotation = actual_state[2];@b@ }@b@ else@b@ {@b@ toModifyNewChildren.add(current_child); }@b@ }@b@ _nodeToModify.setChildren(toModifyNewChildren);@b@ }@b@@b@ int[] to_return = { _start_indice, _nb_span_annotation };@b@ return to_return;@b@ }@b@@b@ private int[] addSpansProcessChildren(Node _nodeToModify, int _start_indice, int _end_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ int already_ended = 0;@b@ NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@ if (childrenOfNodeToModify != null)@b@ {@b@ NodeList toModifyNewChildren = new NodeList();@b@ for (int cptchildren = 0; cptchildren < childrenOfNodeToModify.size(); ++cptchildren)@b@ {@b@ int[] actual_state;@b@ Node current_child = childrenOfNodeToModify.elementAt(cptchildren);@b@@b@ if (current_child instanceof TextNode)@b@ {@b@ actual_state = createSpanInTextNode((TextNode)current_child, cptchildren, childrenOfNodeToModify, toModifyNewChildren, _start_indice, _end_indice, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@ _start_indice = actual_state[0];@b@ _end_indice = actual_state[1];@b@ _nb_span_annotation = actual_state[2];@b@ already_ended = actual_state[3];@b@ if (already_ended == -1) cptchildren = childrenOfNodeToModify.size();@b@@b@ }@b@ else if ((current_child instanceof Span) && (((Span)current_child).getAttribute("class") != null) && (((Span)current_child).getAttribute("class").equalsIgnoreCase("annotation")))@b@ {@b@ actual_state = addSpansProcessChildren(current_child, _start_indice, _end_indice, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@ _start_indice = actual_state[0];@b@ _end_indice = actual_state[1];@b@ _nb_span_annotation = actual_state[2];@b@ already_ended = actual_state[3];@b@ toModifyNewChildren.add(current_child);@b@ if (already_ended == -1)@b@ {@b@ for (int i = cptchildren + 1; i < childrenOfNodeToModify.size(); ++i)@b@ {@b@ toModifyNewChildren.add(childrenOfNodeToModify.elementAt(i));@b@ }@b@@b@ cptchildren = childrenOfNodeToModify.size();@b@ }@b@ }@b@ else@b@ {@b@ toModifyNewChildren.add(current_child); }@b@ }@b@ _nodeToModify.setChildren(toModifyNewChildren);@b@ }@b@ int[] to_return = { _start_indice, _end_indice, _nb_span_annotation, already_ended };@b@ return to_return;@b@ }@b@@b@ private int[] addSpansProcessChildrenNoEndLimit(Node _nodeToModify, int _start_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@ if (childrenOfNodeToModify != null)@b@ {@b@ NodeList toModifyNewChildren = new NodeList();@b@ for (int cptchildren = 0; cptchildren < childrenOfNodeToModify.size(); ++cptchildren)@b@ {@b@ Node current_child = childrenOfNodeToModify.elementAt(cptchildren);@b@@b@ if (current_child instanceof TextNode)@b@ {@b@ int[] actual_state = createSpanInTextNode((TextNode)current_child, cptchildren, childrenOfNodeToModify, toModifyNewChildren, _start_indice, ((TextNode)current_child).getText().length(), _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@ _start_indice = actual_state[0];@b@@b@ _nb_span_annotation = actual_state[2];@b@ }@b@ else@b@ {@b@ toModifyNewChildren.add(current_child); }@b@ }@b@ _nodeToModify.setChildren(toModifyNewChildren);@b@ }@b@@b@ int[] to_return = { _start_indice, _nb_span_annotation };@b@ return to_return;@b@ }@b@@b@ private int[] createSpanInTextNode(TextNode _textNodeToProcess, int _cptchildrenOfNodeToModifyAlreadyProcessed, NodeList _childrenOfNodeToModify, NodeList _newChildrenOfNodeToModify, int _start_indice, int _end_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@ {@b@ int end = 0;@b@ String text_content = _textNodeToProcess.getText();@b@ int length_text_content = text_content.length();@b@ if (length_text_content < _start_indice)@b@ {@b@ _start_indice -= length_text_content;@b@ _end_indice -= length_text_content;@b@ _newChildrenOfNodeToModify.add(_textNodeToProcess);@b@ }@b@ else if ((length_text_content >= _start_indice) && (length_text_content > _end_indice))@b@ {@b@ if (_nb_span_annotation > 0)@b@ {@b@ createSpanAndSurrounding(text_content, _start_indice, _end_indice, text_content.length(), _newChildrenOfNodeToModify, _span_style, _annotation_content, _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@ }@b@ else createSpanAndSurrounding(text_content, _start_indice, _end_indice, text_content.length(), _newChildrenOfNodeToModify, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@@b@ for (int children_unchecked = _cptchildrenOfNodeToModifyAlreadyProcessed + 1; children_unchecked < _childrenOfNodeToModify.size(); ++children_unchecked)@b@ {@b@ _newChildrenOfNodeToModify.add(_childrenOfNodeToModify.elementAt(children_unchecked));@b@ }@b@ _cptchildrenOfNodeToModifyAlreadyProcessed = _childrenOfNodeToModify.size();@b@ end = -1;@b@ }@b@ else if (length_text_content >= _start_indice)@b@ {@b@ createSpanAndBefore(text_content, _start_indice, text_content.length(), _newChildrenOfNodeToModify, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@ ++_nb_span_annotation;@b@ _start_indice = 0;@b@ _end_indice -= length_text_content;@b@ }@b@ int[] to_return = { _start_indice, _end_indice, _nb_span_annotation, end };@b@ return to_return;@b@ }@b@}