首页

关于html-2.0.0源码包中HTMLPage解析类对HTML页面进行完整的解析处理

标签:html-2.0.0,HTMLPage,html页面解析类,html对象,htmlparser,natoine     发布时间:2018-06-07   

一、前言

关于html-2.0.0源码包中fr.natoine.html.HTMLPage类,对html页面对象解析渲染解析处理,具体参见源码说明部分。

二、源码说明

package fr.natoine.html;@b@@b@import fr.natoine.stringOp.StringOp;@b@import java.io.IOException;@b@import java.io.PrintStream;@b@import java.nio.ByteBuffer;@b@import java.nio.CharBuffer;@b@import java.nio.charset.CharacterCodingException;@b@import java.nio.charset.Charset;@b@import java.nio.charset.CharsetDecoder;@b@import java.nio.charset.CharsetEncoder;@b@import java.util.regex.Matcher;@b@import java.util.regex.Pattern;@b@import org.apache.http.client.ClientProtocolException;@b@import org.apache.http.client.HttpClient;@b@import org.apache.http.client.ResponseHandler;@b@import org.apache.http.client.methods.HttpGet;@b@import org.apache.http.conn.ClientConnectionManager;@b@import org.apache.http.impl.client.BasicResponseHandler;@b@import org.apache.http.impl.client.DefaultHttpClient;@b@import org.apache.http.params.HttpParams;@b@import org.htmlparser.Node;@b@import org.htmlparser.Parser;@b@import org.htmlparser.Tag;@b@import org.htmlparser.filters.CssSelectorNodeFilter;@b@import org.htmlparser.filters.NodeClassFilter;@b@import org.htmlparser.nodes.TagNode;@b@import org.htmlparser.nodes.TextNode;@b@import org.htmlparser.tags.BodyTag;@b@import org.htmlparser.tags.HeadTag;@b@import org.htmlparser.tags.Html;@b@import org.htmlparser.tags.LinkTag;@b@import org.htmlparser.tags.MetaTag;@b@import org.htmlparser.tags.Span;@b@import org.htmlparser.tags.StyleTag;@b@import org.htmlparser.tags.TitleTag;@b@import org.htmlparser.util.NodeList;@b@import org.htmlparser.util.ParserException;@b@@b@public class HTMLPage@b@{@b@  private String url;@b@  private String domain;@b@  private String title;@b@  private String css;@b@  private String body;@b@  private String scripts;@b@  private String wrapperDiv;@b@  private String encoding;@b@  private boolean valid;@b@  private static String DEFAULT_WRAPPER = "PortletBrowserContent";@b@  private static int DEFAULT_TIME_TO_CREATE = 3000;@b@@b@  public HTMLPage()@b@  {@b@    this.valid = false;@b@    this.url = "not a valid url";@b@    this.domain = "not a valid domain";@b@    this.title = "no title";@b@    this.css = "";@b@    this.body = "";@b@    this.wrapperDiv = DEFAULT_WRAPPER;@b@    this.encoding = null;@b@  }@b@@b@  public HTMLPage(String _url)@b@  {@b@    this(_url, DEFAULT_TIME_TO_CREATE, DEFAULT_WRAPPER);@b@  }@b@@b@  public HTMLPage(String _url, int _time_to_create, String _wrapperDiv)@b@  {@b@    this.wrapperDiv = _wrapperDiv;@b@@b@    if (_url.endsWith("/")) this.url = _url.substring(0, _url.length() - 1);@b@    else this.url = _url;@b@@b@    if (_url.startsWith("http://"))@b@    {@b@      this.domain = "http://" + extractDomain(_url);@b@      String response_content = extractFullContentPage(_url, _time_to_create);@b@@b@      if (this.valid)@b@      {@b@        extractBodyTitleCss(response_content, _time_to_create);@b@        correctHREF("javascript:browserHREF");@b@@b@        encodeBody();@b@      }@b@      else@b@      {@b@        this.title = this.url;@b@      }@b@    }@b@    finalizeBody();@b@  }@b@@b@  private void finalizeBody()@b@  {@b@    if ((this.body != null) && (this.body.length() > 0))@b@    {@b@      Pattern p = Pattern.compile("(<body>)|(<BODY>)");@b@      Matcher m = p.matcher("");@b@      m.reset(this.body);@b@      this.body = m.replaceAll("<div id='" + this.wrapperDiv + "'>");@b@      p = Pattern.compile("(</body>)|(</BODY>)");@b@      m = p.matcher("");@b@      m.reset(this.body);@b@      this.body = m.replaceAll("</div>");@b@    } else {@b@      this.body = "";@b@    }@b@  }@b@@b@  private void encodeBody()@b@  {@b@    String[] olds;@b@    String[] news;@b@    int i;@b@    if (this.encoding != null)@b@    {@b@      if (this.encoding.equalsIgnoreCase("UTF-8"))@b@      {@b@        CharsetEncoder encoder = Charset.forName("ISO-8859-1").newEncoder();@b@        try {@b@          String decoded = new String(encoder.encode(CharBuffer.wrap(this.body.toCharArray())).array());@b@@b@          CharsetDecoder decoder = Charset.forName(this.encoding).newDecoder();@b@          decoded = decoder.decode(ByteBuffer.wrap(decoded.getBytes())).toString();@b@@b@          this.body = decoded;@b@        }@b@        catch (CharacterCodingException e) {@b@          e.printStackTrace();@b@        }@b@      }@b@    }@b@    else@b@    {@b@      olds = new String[16];@b@      news = new String[16];@b@      olds[0] = "&acirc;"; news[0] = "â";@b@      olds[1] = "&agrave;"; news[1] = "à";@b@      olds[2] = "&eacute;"; news[2] = "é";@b@      olds[3] = "&ecirc;"; news[3] = "ê";@b@      olds[4] = "&egrave;"; news[4] = "è";@b@      olds[5] = "&euml;"; news[5] = "ë";@b@      olds[6] = "&icirc;"; news[6] = "î";@b@      olds[7] = "&iuml;"; news[7] = "ï";@b@      olds[8] = "&ocirc;"; news[8] = "ô";@b@      olds[9] = "&oelig;"; news[9] = "œ";@b@      olds[10] = "&ucirc;"; news[10] = "û";@b@      olds[11] = "&ugrave;"; news[11] = "ù";@b@      olds[12] = "&uuml;"; news[12] = "ü";@b@      olds[13] = "&ccedil;"; news[13] = "ç";@b@      olds[14] = "&lt;"; news[14] = "<";@b@      olds[15] = "&gt;"; news[15] = ">";@b@      for (i = 0; i < olds.length; ++i)@b@      {@b@        this.body = this.body.replaceAll(olds[i], news[i]);@b@      }@b@    }@b@  }@b@@b@  private void correctHREF(String _javascript_href_wrapper)@b@  {@b@    Parser parser = Parser.createParser(this.body, null);@b@    try@b@    {@b@      NodeList nl = parser.parse(null);@b@      NodeList a_hrefs = nl.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class), true);@b@@b@      int nb_a = a_hrefs.size();@b@@b@      char quote = '"';@b@      for (int cpt_a = 0; cpt_a < nb_a; ++cpt_a)@b@      {@b@        Node a = a_hrefs.elementAt(cpt_a);@b@        String original_href = ((TagNode)a).getAttribute("href");@b@        if (original_href != null)@b@        {@b@          String true_href;@b@          if (original_href.startsWith("http")) { true_href = original_href;@b@          } else if (original_href.startsWith("./"))@b@          {@b@            true_href = this.domain + "/" + original_href.substring(2);@b@          }@b@          else if (original_href.startsWith("../"))@b@          {@b@            int firstindexOfslash_href = original_href.indexOf("/");@b@            int lastindexOfslash = this.url.lastIndexOf("/");@b@            if (lastindexOfslash == this.url.length())@b@            {@b@              lastindexOfslash = this.url.substring(0, this.url.length() - 1).lastIndexOf("/");@b@            }@b@            true_href = this.url.subSequence(0, lastindexOfslash) + "/" + original_href.substring(firstindexOfslash_href);@b@          }@b@          else if (original_href.startsWith("/")) { true_href = this.domain + original_href; } else {@b@            true_href = this.url + "/" + original_href; }@b@          ((TagNode)a).setAttribute("href", _javascript_href_wrapper + "('" + true_href + "')", quote);@b@        }@b@      }@b@@b@      this.body = nl.toHtml();@b@    }@b@    catch (ParserException e) {@b@      System.out.println("[HTMLPage.correctHREF] problems while Parsing");@b@      e.printStackTrace();@b@    }@b@  }@b@@b@  public String extractTitle() throws ParserException@b@  {@b@    String response_content = extractFullContentPage(this.url, DEFAULT_TIME_TO_CREATE);@b@    if (this.valid)@b@    {@b@      this.title = this.url;@b@      Parser parser = Parser.createParser(response_content, null);@b@      NodeList nl = parser.parse(null);@b@      NodeList titles = nl.extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class), true);@b@      if ((titles.size() > 0) && @b@        (titles.elementAt(0) instanceof Tag)) this.title = ((Tag)titles.elementAt(0)).getFirstChild().getText();@b@    }@b@@b@    return this.title;@b@  }@b@@b@  public String extractDomain(String _url)@b@  {@b@    String domain = "not a valid domain";@b@    String[] _url_split = _url.split("/");@b@    if (_url_split.length > 1) return _url_split[2];@b@    return domain;@b@  }@b@@b@  public String extractFullContentResource(String _url, int _time_to_respond)@b@  {@b@    String response_content = null;@b@    HttpClient httpclient = new DefaultHttpClient();@b@@b@    httpclient.getParams().setBooleanParameter("http.protocol.handle-redirects", true);@b@    httpclient.getParams().setBooleanParameter("http.protocol.handle-authentication", true);@b@    httpclient.getParams().setIntParameter("http.socket.timeout", 3000);@b@@b@    HttpGet httpget = new HttpGet(_url);@b@    try@b@    {@b@      ResponseHandler responseHandler = new BasicResponseHandler();@b@      String responseBody = (String)httpclient.execute(httpget, responseHandler);@b@      if (responseBody != null)@b@      {@b@        response_content = responseBody;@b@      }@b@@b@      return response_content;@b@    }@b@    catch (ClientProtocolException e)@b@    {@b@      System.out.println("[HTMLPage.extractFullContentCssLink] url : " + _url + " doesn't support GET requests !!! ");@b@      e.printStackTrace();@b@@b@      return response_content;@b@    }@b@    catch (IOException e)@b@    {@b@      System.out.println("[HTMLPage.extractFullContentCssLink] url : " + _url + " send no data !!! Not responding ... ");@b@      e.printStackTrace();@b@@b@      return response_content;@b@    }@b@    finally@b@    {@b@      httpclient.getConnectionManager().shutdown(); }@b@    return response_content;@b@  }@b@@b@  private String extractFullContentPage(String _url, int _time_to_respond)@b@  {@b@    String content = extractFullContentResource(_url, _time_to_respond);@b@    if (content != null) this.valid = true;@b@    else { this.valid = false;@b@    }@b@@b@    return content;@b@  }@b@@b@  private void extractBodyTitleCss(String _html, int _time_to_extract_css)@b@  {@b@    Parser parser;@b@    try {@b@      parser = Parser.createParser(_html, null);@b@      NodeList nl = parser.parse(null);@b@@b@      NodeList htmls = nl.extractAllNodesThatMatch(new NodeClassFilter(Html.class));@b@      if (htmls.size() > 0)@b@      {@b@        NodeList heads = htmls.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(HeadTag.class));@b@        NodeList bodys = htmls.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(BodyTag.class));@b@        int nb_heads_node = heads.size();@b@        if (nb_heads_node > 0)@b@        {@b@          int cpt_metas;@b@          NodeList titles = heads.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(TitleTag.class));@b@          if (titles.size() > 0)@b@          {@b@            if (titles.elementAt(0) instanceof Tag)@b@            {@b@              Tag tag_title = (Tag)titles.elementAt(0);@b@              this.title = tag_title.getFirstChild().getText();@b@            } else {@b@              this.title = this.url;@b@            }@b@          }@b@          else {@b@            System.out.println("[HTMLPage.extractBodyTitleCss] no title tag, url for default title value");@b@            this.title = this.url;@b@          }@b@@b@          NodeList metas = heads.elementAt(0).getChildren().extractAllNodesThatMatch(new NodeClassFilter(MetaTag.class));@b@          if (metas.size() > 0)@b@          {@b@            int metas_size = metas.size();@b@            for (cpt_metas = 0; cpt_metas < metas_size; ++cpt_metas)@b@            {@b@              MetaTag meta = (MetaTag)metas.elementAt(cpt_metas);@b@              String httpEquiv = meta.getHttpEquiv();@b@              if (httpEquiv != null)@b@              {@b@                if (httpEquiv.equalsIgnoreCase("Content-Type"))@b@                {@b@                  String content = meta.getMetaContent();@b@                  if (content.contains("charset"))@b@                  {@b@                    int charset_index = content.indexOf("charset");@b@                    int egal_index = content.indexOf("=", charset_index);@b@                    String charset = content.substring(egal_index + 1, content.length());@b@                    this.encoding = StringOp.deleteBlanks(charset);@b@                    break;@b@                  }@b@                }@b@              }@b@              else@b@              {@b@                String charset = meta.getAttribute("charset");@b@                if (charset != null)@b@                {@b@                  this.encoding = charset;@b@                  break;@b@                }@b@              }@b@            }@b@          }@b@@b@          String wip_css = "";@b@@b@          for (int cpt_heads_node = 0; cpt_heads_node < nb_heads_node; ++cpt_heads_node)@b@          {@b@            NodeList headChildren = heads.elementAt(cpt_heads_node).getChildren();@b@@b@            int nb_insideHead_nodes = headChildren.size();@b@@b@            for (int cpt_insideHead_nodes = 0; cpt_insideHead_nodes < nb_insideHead_nodes; ++cpt_insideHead_nodes)@b@            {@b@              Node currentNode = headChildren.elementAt(cpt_insideHead_nodes);@b@@b@              if (currentNode instanceof StyleTag)@b@              {@b@                wip_css = wip_css.concat(((StyleTag)currentNode).getStyleCode());@b@              }@b@@b@              if ((currentNode instanceof TagNode) && @b@                (((TagNode)currentNode).getRawTagName().equalsIgnoreCase("link")) && (((TagNode)currentNode).getAttribute("rel").equalsIgnoreCase("stylesheet")))@b@              {@b@                String href_css = ((TagNode)currentNode).getAttribute("href");@b@@b@                String true_url_href_css = null;@b@                if (href_css.startsWith("http://")) { true_url_href_css = href_css;@b@                }@b@                else if (href_css.startsWith("./"))@b@                {@b@                  true_url_href_css = this.domain + "/" + href_css.substring(2);@b@                }@b@                else if (href_css.startsWith("../"))@b@                {@b@                  int firstindexOfslash_href = href_css.indexOf("/");@b@                  int lastindexOfslash = this.url.lastIndexOf("/");@b@                  if (lastindexOfslash == this.url.length())@b@                  {@b@                    lastindexOfslash = this.url.substring(0, this.url.length() - 1).lastIndexOf("/");@b@                  }@b@                  true_url_href_css = this.url.subSequence(0, lastindexOfslash) + "/" + href_css.substring(firstindexOfslash_href);@b@                }@b@                else if (href_css.startsWith("/")) { true_url_href_css = this.domain + href_css; } else {@b@                  true_url_href_css = this.url + "/" + href_css;@b@                }@b@@b@                String css_content = extractFullContentResource(true_url_href_css, _time_to_extract_css);@b@                if (css_content != null) wip_css = wip_css.concat(css_content);@b@@b@              }@b@@b@            }@b@@b@          }@b@@b@          this.css = deleteCommentsNewLine(wip_css, this.wrapperDiv);@b@        }@b@        else@b@        {@b@          System.out.println("[HTMLPage.extractBodyTitleCss] no head tag, default title value = url");@b@          this.title = this.url;@b@        }@b@        if (bodys.size() > 0)@b@        {@b@          this.body = bodys.elementAt(0).toHtml();@b@        }@b@      }@b@      else@b@      {@b@        System.out.println("[HTMLPage.extractBodyTitleCss] not a valid HTML content");@b@        this.title = this.url;@b@      }@b@    }@b@    catch (ParserException e)@b@    {@b@      this.title = this.url;@b@      System.out.println("[HTMLPage.extractBodyTitleCss] error parsing HTML content");@b@      e.printStackTrace();@b@    }@b@  }@b@@b@  public String deleteCommentsNewLine(String _wip_css, String _new_englobing_div)@b@  {@b@    if ((_wip_css == null) || (_wip_css.length() == 0)) { return "";@b@    }@b@@b@    Pattern p = Pattern.compile("(?:/\\*(?:[^*]|(?:\\*+[^*/]))*\\*+/)", 8);@b@    Matcher m = p.matcher("");@b@    m.reset(_wip_css);@b@    String result = m.replaceAll("");@b@@b@    p = Pattern.compile("(\n)|(\t)");@b@    m = p.matcher("");@b@    m.reset(result);@b@    result = m.replaceAll("");@b@@b@    p = Pattern.compile(" (?= )|(?<= ) ");@b@    m = p.matcher("");@b@    m.reset(result);@b@    result = m.replaceAll(" ");@b@@b@    p = Pattern.compile("}");@b@    m = p.matcher("");@b@    m.reset(result);@b@    result = m.replaceAll("} #" + _new_englobing_div + " ");@b@    int cpt_last_spaces_index = result.length();@b@    while ((cpt_last_spaces_index > 0) && (result.charAt(cpt_last_spaces_index - 1) == ' '))@b@    {@b@      --cpt_last_spaces_index;@b@    }@b@    result = result.substring(0, cpt_last_spaces_index);@b@    if (result.endsWith("#" + _new_englobing_div)) result = result.substring(0, result.lastIndexOf("#" + _new_englobing_div));@b@    result = "#" + _new_englobing_div + " ".concat(result);@b@    return result;@b@  }@b@@b@  public String toString()@b@  {@b@    String _to_print = "Classe " + super.getClass();@b@    _to_print = _to_print.concat(" url : " + this.url);@b@    _to_print = _to_print.concat(" domain : " + this.domain);@b@    _to_print = _to_print.concat(" title : " + this.title);@b@    _to_print = _to_print.concat(" css : " + this.css);@b@    _to_print = _to_print.concat(" body : " + this.body);@b@    if (this.valid) _to_print = _to_print.concat(" valid !!!");@b@    else _to_print = _to_print.concat(" not valid !!!");@b@    return _to_print;@b@  }@b@@b@  public String getEncoding()@b@  {@b@    return this.encoding;@b@  }@b@@b@  public void setEncoding(String encoding)@b@  {@b@    this.encoding = encoding;@b@  }@b@@b@  public String getURL() {@b@    return this.url; }@b@@b@  public void setURL(String uRL) {@b@    this.url = uRL; }@b@@b@  public String getDomain() {@b@    return this.domain; }@b@@b@  public void setDomain(String domain) {@b@    this.domain = domain; }@b@@b@  public String getTitle() {@b@    return this.title; }@b@@b@  public void setTitle(String title) {@b@    this.title = title; }@b@@b@  public String getCss() {@b@    return this.css; }@b@@b@  public void setCss(String css) {@b@    this.css = css; }@b@@b@  public String getBody() {@b@    return this.body; }@b@@b@  public void setBody(String body) {@b@    this.body = body;@b@  }@b@@b@  public void setScripts(String scripts)@b@  {@b@    this.scripts = scripts;@b@  }@b@@b@  public String getScripts() {@b@    return this.scripts;@b@  }@b@@b@  public String[] xpointerSplit(String _xpointer)@b@  {@b@    String xpointer_tag = "#xpointer(";@b@    int begin_sub = _xpointer.indexOf(xpointer_tag) + xpointer_tag.length();@b@    String clean_xpointer = _xpointer.substring(begin_sub, _xpointer.length());@b@    clean_xpointer = clean_xpointer.substring(0, clean_xpointer.indexOf(44));@b@    return clean_xpointer.split("/");@b@  }@b@@b@  private int getTextPositionXpointer(String _xpointer)@b@  {@b@    int coma_index = _xpointer.indexOf(44);@b@    if ((coma_index > 0) && (coma_index < _xpointer.length()))@b@    {@b@      String position = _xpointer.substring(coma_index + 1, _xpointer.length() - 1);@b@      return Integer.parseInt(position);@b@    }@b@    return -1;@b@  }@b@@b@  public boolean isChildXPointer(String _xpointer_father, String _xpointer_child)@b@    throws ParserException@b@  {@b@    if (_xpointer_father.contains(","))@b@    {@b@      String clean_xpointer_father = _xpointer_father.split(",")[0];@b@      if (_xpointer_child.startsWith(clean_xpointer_father)) return true;@b@    }@b@    Parser parser = Parser.createParser(this.body, null);@b@    NodeList nl = parser.parse(null);@b@    Node father = getNodeXpointer(_xpointer_father, nl);@b@    Node child = getNodeXpointer(_xpointer_child, nl);@b@@b@    return isChildNode(father, child);@b@  }@b@@b@  public boolean isChildNode(Node _father, Node _child)@b@  {@b@    boolean to_return = false;@b@    NodeList children = _father.getChildren();@b@    int children_length = 0;@b@    if (children != null) children_length = children.size();@b@    int cpt_children = 0;@b@    while ((!(to_return)) && (cpt_children < children_length))@b@    {@b@      Node to_test = children.elementAt(cpt_children);@b@      if (to_test.equals(_child)) return true;@b@      to_return = isChildNode(to_test, _child);@b@      ++cpt_children;@b@    }@b@    return to_return;@b@  }@b@@b@  public Node getNodeXpointer(String _xpointer, NodeList _nl)@b@    throws ParserException@b@  {@b@    Node current = null;@b@    String[] splited_xpointer = xpointerSplit(_xpointer);@b@@b@    int nb_selectors = splited_xpointer.length;@b@@b@    if (splited_xpointer[0].contains("body"))@b@    {@b@      current = _nl.elementAt(0);@b@    }@b@    else if (splited_xpointer[0].contains("id"))@b@    {@b@      String id = splited_xpointer[0].substring(splited_xpointer[0].indexOf("id") + 4, splited_xpointer[0].length() - 2);@b@@b@      NodeList nlId = _nl.extractAllNodesThatMatch(new CssSelectorNodeFilter("#" + id), true);@b@      if (nlId.size() > 0) current = nlId.elementAt(0);@b@    }@b@@b@    if (nb_selectors == 1) { return current;@b@    }@b@@b@    int cpt_node_selector = 1;@b@    while ((cpt_node_selector < nb_selectors) && (current != null))@b@    {@b@      int indice_child_node = Integer.parseInt(splited_xpointer[cpt_node_selector]);@b@@b@      NodeList children = current.getChildren();@b@@b@      int nb_children = 0;@b@      int true_nb_children = 0;@b@      int children_size = 0;@b@      if (children != null) children_size = children.size();@b@      while ((nb_children < children_size) && (true_nb_children < indice_child_node))@b@      {@b@        Node current_child = children.elementAt(nb_children);@b@@b@        if ((current_child instanceof Span) && (((Span)current_child).getAttribute("class") != null) && (((Span)current_child).getAttribute("class").equals("annotation")))@b@        {@b@          ++nb_children;@b@        }@b@        else if (current_child instanceof TextNode)@b@        {@b@          ++nb_children;@b@        }@b@        else@b@        {@b@          ++nb_children;@b@          ++true_nb_children;@b@        }@b@      }@b@@b@      if ((true_nb_children == indice_child_node) && (children != null) && (children.size() > 0))@b@      {@b@        if (nb_children > 0) current = children.elementAt(nb_children - 1);@b@        else current = children.elementAt(0);@b@      }@b@@b@      ++cpt_node_selector;@b@    }@b@@b@    if ((current instanceof Span) && (((Span)current).getAttribute("class") != null) && (((Span)current).getAttribute("class").equals("annotation")))@b@    {@b@      Node next_sibling = current.getNextSibling();@b@      while ((next_sibling != null) && (next_sibling instanceof Span) && (((Span)next_sibling).getAttribute("class") != null) && (((Span)next_sibling).getAttribute("class").equals("annotation")))@b@      {@b@        next_sibling = next_sibling.getNextSibling();@b@      }@b@      if (next_sibling != null) current = next_sibling;@b@    }@b@    return current;@b@  }@b@@b@  private boolean testSameNodeXpointer(String _xpointer1, String _xpointer2)@b@    throws ParserException@b@  {@b@    if (!(_xpointer1.substring(0, _xpointer1.indexOf("#")).equalsIgnoreCase(_xpointer2.substring(0, _xpointer2.indexOf("#"))))) return false;@b@    if (_xpointer1.substring(0, _xpointer1.indexOf(44)).equalsIgnoreCase(_xpointer2.substring(0, _xpointer2.indexOf(44)))) { return true;@b@    }@b@@b@    Parser parser = Parser.createParser(this.body, null);@b@    NodeList nl = parser.parse(null);@b@    Node node1 = getNodeXpointer(_xpointer1, nl);@b@    Node node2 = getNodeXpointer(_xpointer2, nl);@b@    return (node1 == node2);@b@  }@b@@b@  private void createSpanAndBefore(String _toModify, int _indice_start, int _indice_end, NodeList _newChildrenList, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    String beforeSpan = _toModify.substring(0, _indice_start);@b@    String insideSpan = _toModify.substring(_indice_start, _indice_end);@b@    TextNode before_node = new TextNode(beforeSpan);@b@    _newChildrenList.add(before_node);@b@    Span span = createAnnotation(_span_style, _annotation_content, _annotation_id, insideSpan, _endSpan);@b@    _newChildrenList.add(span);@b@  }@b@@b@  private void createSpanAndSurrounding(String _toModify, int _indice_start, int _indice_end, int _to_modify_content_length, NodeList _newChildrenList, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    createSpanAndBefore(_toModify, _indice_start, _indice_end, _newChildrenList, _span_style, _annotation_content, _annotation_id, _endSpan);@b@    String afterSpan = _toModify.substring(_indice_end, _to_modify_content_length);@b@    TextNode after_node = new TextNode(afterSpan);@b@    _newChildrenList.add(after_node);@b@  }@b@@b@  private Span createAnnotation(String _span_style, String _annotation_content, String _annotation_id, String _text_inside_span, TagNode _endSpan)@b@  {@b@    Span span = new Span();@b@    span.setAttribute("class", "annotation", '"');@b@    span.setAttribute("style", _span_style, '"');@b@    span.setAttribute("title", _annotation_content, '"');@b@    span.setAttribute("id", "annotation_" + _annotation_id, '\'');@b@    NodeList newSpanChildrenList = new NodeList();@b@    TextNode inside_span_node = new TextNode(_text_inside_span);@b@    newSpanChildrenList.add(inside_span_node);@b@    span.setChildren(newSpanChildrenList);@b@    span.setEndPosition(_text_inside_span.length());@b@    span.setEndTag(_endSpan);@b@    return span;@b@  }@b@@b@  public void addAnnotationSpan(String _xpointer_start, String _xpointer_end, String _span_style, String _annotation_content, String _annotation_id)@b@    throws ParserException@b@  {@b@    TagNode endSpan = new TagNode();@b@    endSpan.setTagName("/SPAN");@b@    Parser parser = Parser.createParser(this.body, null);@b@    NodeList nl = parser.parse(null);@b@    int indice_start = getTextPositionXpointer(_xpointer_start);@b@    int indice_end = getTextPositionXpointer(_xpointer_end);@b@    int nb_span_annotation = 0;@b@@b@    if (testSameNodeXpointer(_xpointer_start, _xpointer_end))@b@    {@b@      Node nodeToModify = getNodeXpointer(_xpointer_start, nl);@b@@b@      if (nodeToModify != null)@b@      {@b@        if (indice_start < indice_end) addSpans(nodeToModify, indice_start, indice_end, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@        else if (indice_start > indice_end) { addSpans(nodeToModify, indice_end, indice_start, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@        }@b@@b@      }@b@@b@    }@b@    else@b@    {@b@      boolean isChild = isChildXPointer(_xpointer_start, _xpointer_end);@b@      Node startNode = getNodeXpointer(_xpointer_start, nl);@b@      Node endNode = getNodeXpointer(_xpointer_end, nl);@b@      if ((startNode != null) && (endNode != null))@b@      {@b@        int[] actual_state;@b@        if (isChild)@b@        {@b@          actual_state = addSpansAllNodeUntilSpecificNode(startNode, endNode, indice_start, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@          indice_start = 0;@b@          nb_span_annotation = actual_state[1];@b@@b@          addSpans(endNode, 0, indice_end, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@        }@b@        else@b@        {@b@          actual_state = addSpansAllChildren(startNode, indice_start, 0, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@          nb_span_annotation = actual_state[1];@b@@b@          Node next_sibling = startNode.getNextSibling();@b@          NodeList toHighlight = new NodeList();@b@          while ((next_sibling != null) && (!(isChildNode(next_sibling, endNode))))@b@          {@b@            toHighlight.add(next_sibling);@b@            next_sibling = next_sibling.getNextSibling();@b@          }@b@          if (next_sibling != null)@b@          {@b@            for (int i = 0; i < toHighlight.size(); ++i)@b@            {@b@              actual_state = addSpansProcessChildrenNoEndLimit(toHighlight.elementAt(i), 0, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@              nb_span_annotation = actual_state[1];@b@            }@b@@b@            actual_state = addSpansAllNodeUntilSpecificNode(next_sibling, endNode, 0, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@            nb_span_annotation = actual_state[1];@b@          }@b@@b@          addSpans(endNode, 0, indice_end, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@        }@b@      }@b@    }@b@    if (nl != null)@b@    {@b@      String new_html = nl.toHtml();@b@      if ((new_html != null) && (new_html.length() > 0)) this.body = new_html;@b@    }@b@  }@b@@b@  private int[] addSpansAllNodeUntilSpecificNode(Node startNode, Node endNode, int indice_start, int nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode endSpan)@b@  {@b@    NodeList startNodeChildren = startNode.getChildren();@b@    if (startNodeChildren != null)@b@    {@b@      NodeList newStartNodeList = new NodeList();@b@      int cpt_children = 0;@b@      Node current_child = startNodeChildren.elementAt(cpt_children);@b@      while ((!(current_child.equals(endNode))) && (!(isChildNode(current_child, endNode))))@b@      {@b@        if (current_child instanceof TextNode)@b@        {@b@          String toModifyContent = ((TextNode)current_child).getText();@b@          int to_modify_content_length = toModifyContent.length();@b@@b@          if (indice_start > to_modify_content_length) indice_start = 0;@b@          if (nb_span_annotation > 0) createSpanAndBefore(toModifyContent, indice_start, to_modify_content_length, newStartNodeList, _span_style, _annotation_content, "" + _annotation_id + "-" + nb_span_annotation, endSpan);@b@          else createSpanAndBefore(toModifyContent, indice_start, to_modify_content_length, newStartNodeList, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@          ++nb_span_annotation;@b@        }@b@        else@b@        {@b@          int[] actual_state = addSpansNoEndLimit(current_child, indice_start, nb_span_annotation, _span_style, _annotation_content, "" + _annotation_id, endSpan);@b@          newStartNodeList.add(current_child);@b@          indice_start = 0;@b@          nb_span_annotation = actual_state[1];@b@        }@b@        ++cpt_children;@b@        current_child = startNodeChildren.elementAt(cpt_children);@b@      }@b@@b@      while (cpt_children < startNodeChildren.size())@b@      {@b@        newStartNodeList.add(startNodeChildren.elementAt(cpt_children));@b@        ++cpt_children;@b@      }@b@@b@      startNode.setChildren(newStartNodeList);@b@    }@b@    int[] to_return = { indice_start, nb_span_annotation };@b@    return to_return;@b@  }@b@@b@  private int[] addSpans(Node _nodeToModify, int _indice_start, int _indice_end, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@    if (childrenOfNodeToModify != null)@b@    {@b@      int nb_children = childrenOfNodeToModify.size();@b@      if (nb_children > 0)@b@      {@b@        if ((childrenOfNodeToModify.size() == 1) && (childrenOfNodeToModify.elementAt(0) instanceof TextNode))@b@        {@b@          TextNode content_textnode = (TextNode)childrenOfNodeToModify.elementAt(0);@b@          String toModifyContent = content_textnode.getText();@b@          int to_modify_content_length = toModifyContent.length();@b@@b@          if (_indice_start > to_modify_content_length) _indice_start = 0;@b@          if (_indice_end > to_modify_content_length) _indice_end = to_modify_content_length;@b@          NodeList newChildrenList = new NodeList();@b@          if (_nb_span_annotation > 0) createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@          else createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@          _nodeToModify.setChildren(newChildrenList);@b@          ++_nb_span_annotation;@b@        }@b@        else@b@        {@b@          int[] actual_state = addSpansProcessChildren(_nodeToModify, _indice_start, _indice_end, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@          _indice_start = actual_state[0];@b@          _indice_end = actual_state[1];@b@          _nb_span_annotation = actual_state[2];@b@        }@b@      }@b@    }@b@    else if (_nodeToModify instanceof TextNode)@b@    {@b@      String toModifyContent = ((TextNode)_nodeToModify).getText();@b@      int to_modify_content_length = toModifyContent.length();@b@@b@      if (_indice_start > to_modify_content_length) _indice_start = 0;@b@      if (_indice_end > to_modify_content_length) _indice_end = to_modify_content_length;@b@      NodeList newChildrenList = new NodeList();@b@      if (_nb_span_annotation > 0) createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@      else createSpanAndSurrounding(toModifyContent, _indice_start, _indice_end, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@      _nodeToModify.setChildren(newChildrenList);@b@      ++_nb_span_annotation;@b@    }@b@    int[] to_return = { _indice_start, _indice_end, _nb_span_annotation };@b@    return to_return;@b@  }@b@@b@  private int[] addSpansNoEndLimit(Node _nodeToModify, int _indice_start, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@    if (childrenOfNodeToModify != null)@b@    {@b@      int nb_children = childrenOfNodeToModify.size();@b@      if (nb_children > 0)@b@      {@b@        if ((childrenOfNodeToModify.size() == 1) && (childrenOfNodeToModify.elementAt(0) instanceof TextNode))@b@        {@b@          TextNode content_textnode = (TextNode)childrenOfNodeToModify.elementAt(0);@b@          String toModifyContent = content_textnode.getText();@b@          int to_modify_content_length = toModifyContent.length();@b@@b@          if (_indice_start > to_modify_content_length) _indice_start = 0;@b@          NodeList newChildrenList = new NodeList();@b@          if (_nb_span_annotation > 0) createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@          else createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@          _nodeToModify.setChildren(newChildrenList);@b@@b@          ++_nb_span_annotation;@b@        }@b@        else@b@        {@b@          int[] actual_state = addSpansProcessChildrenNoEndLimit(_nodeToModify, _indice_start, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@          _indice_start = actual_state[0];@b@          _nb_span_annotation = actual_state[1];@b@        }@b@      }@b@    }@b@    else if (_nodeToModify instanceof TextNode)@b@    {@b@      String toModifyContent = ((TextNode)_nodeToModify).getText();@b@      int to_modify_content_length = toModifyContent.length();@b@@b@      if (_indice_start > to_modify_content_length) _indice_start = 0;@b@      NodeList newChildrenList = new NodeList();@b@      if (_nb_span_annotation > 0) createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@      else createSpanAndBefore(toModifyContent, _indice_start, to_modify_content_length, newChildrenList, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@      _nodeToModify.setChildren(newChildrenList);@b@      ++_nb_span_annotation;@b@    }@b@    int[] to_return = { _indice_start, _nb_span_annotation };@b@    return to_return;@b@  }@b@@b@  private int[] addSpansAllChildren(Node _nodeToModify, int _start_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@    if (childrenOfNodeToModify != null)@b@    {@b@      NodeList toModifyNewChildren = new NodeList();@b@      for (int cptchildren = 0; cptchildren < childrenOfNodeToModify.size(); ++cptchildren)@b@      {@b@        Node current_child = childrenOfNodeToModify.elementAt(cptchildren);@b@@b@        if (current_child instanceof TextNode)@b@        {@b@          int[] actual_state = createSpanInTextNode((TextNode)current_child, cptchildren, childrenOfNodeToModify, toModifyNewChildren, _start_indice, ((TextNode)current_child).getText().length(), _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@          _start_indice = 0;@b@          _nb_span_annotation = actual_state[2];@b@        }@b@        else@b@        {@b@          toModifyNewChildren.add(current_child); }@b@      }@b@      _nodeToModify.setChildren(toModifyNewChildren);@b@    }@b@@b@    int[] to_return = { _start_indice, _nb_span_annotation };@b@    return to_return;@b@  }@b@@b@  private int[] addSpansProcessChildren(Node _nodeToModify, int _start_indice, int _end_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    int already_ended = 0;@b@    NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@    if (childrenOfNodeToModify != null)@b@    {@b@      NodeList toModifyNewChildren = new NodeList();@b@      for (int cptchildren = 0; cptchildren < childrenOfNodeToModify.size(); ++cptchildren)@b@      {@b@        int[] actual_state;@b@        Node current_child = childrenOfNodeToModify.elementAt(cptchildren);@b@@b@        if (current_child instanceof TextNode)@b@        {@b@          actual_state = createSpanInTextNode((TextNode)current_child, cptchildren, childrenOfNodeToModify, toModifyNewChildren, _start_indice, _end_indice, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@          _start_indice = actual_state[0];@b@          _end_indice = actual_state[1];@b@          _nb_span_annotation = actual_state[2];@b@          already_ended = actual_state[3];@b@          if (already_ended == -1) cptchildren = childrenOfNodeToModify.size();@b@@b@        }@b@        else if ((current_child instanceof Span) && (((Span)current_child).getAttribute("class") != null) && (((Span)current_child).getAttribute("class").equalsIgnoreCase("annotation")))@b@        {@b@          actual_state = addSpansProcessChildren(current_child, _start_indice, _end_indice, _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@          _start_indice = actual_state[0];@b@          _end_indice = actual_state[1];@b@          _nb_span_annotation = actual_state[2];@b@          already_ended = actual_state[3];@b@          toModifyNewChildren.add(current_child);@b@          if (already_ended == -1)@b@          {@b@            for (int i = cptchildren + 1; i < childrenOfNodeToModify.size(); ++i)@b@            {@b@              toModifyNewChildren.add(childrenOfNodeToModify.elementAt(i));@b@            }@b@@b@            cptchildren = childrenOfNodeToModify.size();@b@          }@b@        }@b@        else@b@        {@b@          toModifyNewChildren.add(current_child); }@b@      }@b@      _nodeToModify.setChildren(toModifyNewChildren);@b@    }@b@    int[] to_return = { _start_indice, _end_indice, _nb_span_annotation, already_ended };@b@    return to_return;@b@  }@b@@b@  private int[] addSpansProcessChildrenNoEndLimit(Node _nodeToModify, int _start_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    NodeList childrenOfNodeToModify = _nodeToModify.getChildren();@b@    if (childrenOfNodeToModify != null)@b@    {@b@      NodeList toModifyNewChildren = new NodeList();@b@      for (int cptchildren = 0; cptchildren < childrenOfNodeToModify.size(); ++cptchildren)@b@      {@b@        Node current_child = childrenOfNodeToModify.elementAt(cptchildren);@b@@b@        if (current_child instanceof TextNode)@b@        {@b@          int[] actual_state = createSpanInTextNode((TextNode)current_child, cptchildren, childrenOfNodeToModify, toModifyNewChildren, _start_indice, ((TextNode)current_child).getText().length(), _nb_span_annotation, _span_style, _annotation_content, _annotation_id, _endSpan);@b@          _start_indice = actual_state[0];@b@@b@          _nb_span_annotation = actual_state[2];@b@        }@b@        else@b@        {@b@          toModifyNewChildren.add(current_child); }@b@      }@b@      _nodeToModify.setChildren(toModifyNewChildren);@b@    }@b@@b@    int[] to_return = { _start_indice, _nb_span_annotation };@b@    return to_return;@b@  }@b@@b@  private int[] createSpanInTextNode(TextNode _textNodeToProcess, int _cptchildrenOfNodeToModifyAlreadyProcessed, NodeList _childrenOfNodeToModify, NodeList _newChildrenOfNodeToModify, int _start_indice, int _end_indice, int _nb_span_annotation, String _span_style, String _annotation_content, String _annotation_id, TagNode _endSpan)@b@  {@b@    int end = 0;@b@    String text_content = _textNodeToProcess.getText();@b@    int length_text_content = text_content.length();@b@    if (length_text_content < _start_indice)@b@    {@b@      _start_indice -= length_text_content;@b@      _end_indice -= length_text_content;@b@      _newChildrenOfNodeToModify.add(_textNodeToProcess);@b@    }@b@    else if ((length_text_content >= _start_indice) && (length_text_content > _end_indice))@b@    {@b@      if (_nb_span_annotation > 0)@b@      {@b@        createSpanAndSurrounding(text_content, _start_indice, _end_indice, text_content.length(), _newChildrenOfNodeToModify, _span_style, _annotation_content, _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@      }@b@      else createSpanAndSurrounding(text_content, _start_indice, _end_indice, text_content.length(), _newChildrenOfNodeToModify, _span_style, _annotation_content, "" + _annotation_id, _endSpan);@b@@b@      for (int children_unchecked = _cptchildrenOfNodeToModifyAlreadyProcessed + 1; children_unchecked < _childrenOfNodeToModify.size(); ++children_unchecked)@b@      {@b@        _newChildrenOfNodeToModify.add(_childrenOfNodeToModify.elementAt(children_unchecked));@b@      }@b@      _cptchildrenOfNodeToModifyAlreadyProcessed = _childrenOfNodeToModify.size();@b@      end = -1;@b@    }@b@    else if (length_text_content >= _start_indice)@b@    {@b@      createSpanAndBefore(text_content, _start_indice, text_content.length(), _newChildrenOfNodeToModify, _span_style, _annotation_content, "" + _annotation_id + "-" + _nb_span_annotation, _endSpan);@b@      ++_nb_span_annotation;@b@      _start_indice = 0;@b@      _end_indice -= length_text_content;@b@    }@b@    int[] to_return = { _start_indice, _end_indice, _nb_span_annotation, end };@b@    return to_return;@b@  }@b@}
  • <<相关内容>>
<<热门下载>>