一、前言
关于jena-text源码包定义全文搜索索引TextIndex接口,并分别基于lucene的TextIndexLucene、solr的TextIndexSolr实现类,详情见源码说明。
二、源码示例
1.TextIndex文本索引接口
package org.apache.jena.query.text;@b@@b@import com.hp.hpl.jena.graph.Node;@b@import java.util.List;@b@import java.util.Map;@b@import org.apache.jena.atlas.lib.Closeable;@b@@b@public abstract interface TextIndex extends Closeable@b@{@b@ public abstract void startIndexing();@b@@b@ public abstract void addEntity(Entity paramEntity);@b@@b@ public abstract void finishIndexing();@b@@b@ public abstract void abortIndexing();@b@@b@ public abstract Map<String, Node> get(String paramString);@b@@b@ public abstract List<Node> query(String paramString, int paramInt);@b@@b@ public abstract List<Node> query(String paramString);@b@@b@ public abstract EntityDefinition getDocDef();@b@}
2.TextIndexLucene实现类
package org.apache.jena.query.text;@b@@b@import com.hp.hpl.jena.graph.Node;@b@import com.hp.hpl.jena.graph.NodeFactory;@b@import com.hp.hpl.jena.sparql.util.NodeFactoryExtra;@b@import java.io.IOException;@b@import java.util.ArrayList;@b@import java.util.HashMap;@b@import java.util.List;@b@import java.util.Map;@b@import java.util.Map.Entry;@b@import org.apache.lucene.analysis.Analyzer;@b@import org.apache.lucene.analysis.core.KeywordAnalyzer;@b@import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;@b@import org.apache.lucene.analysis.standard.StandardAnalyzer;@b@import org.apache.lucene.document.Document;@b@import org.apache.lucene.document.Field;@b@import org.apache.lucene.document.FieldType;@b@import org.apache.lucene.document.StringField;@b@import org.apache.lucene.document.TextField;@b@import org.apache.lucene.index.DirectoryReader;@b@import org.apache.lucene.index.IndexReader;@b@import org.apache.lucene.index.IndexWriter;@b@import org.apache.lucene.index.IndexWriterConfig;@b@import org.apache.lucene.queryparser.classic.ParseException;@b@import org.apache.lucene.queryparser.classic.QueryParser;@b@import org.apache.lucene.queryparser.classic.QueryParserBase;@b@import org.apache.lucene.search.IndexSearcher;@b@import org.apache.lucene.search.Query;@b@import org.apache.lucene.search.ScoreDoc;@b@import org.apache.lucene.search.TopDocs;@b@import org.apache.lucene.store.Directory;@b@import org.apache.lucene.util.Version;@b@import org.slf4j.Logger;@b@import org.slf4j.LoggerFactory;@b@@b@public class TextIndexLucene@b@ implements TextIndex@b@{@b@ private static Logger log = LoggerFactory.getLogger(TextIndexLucene.class);@b@ private static int MAX_N = 10000;@b@ public static final Version VER = Version.LUCENE_46;@b@ public static final FieldType ftIRI = new FieldType();@b@ public static final FieldType ftString;@b@ public static final FieldType ftText;@b@ private final EntityDefinition docDef;@b@ private final Directory directory;@b@ private IndexWriter indexWriter;@b@ private Analyzer analyzer;@b@@b@ public TextIndexLucene(Directory directory, EntityDefinition def)@b@ {@b@ this.directory = directory;@b@ this.docDef = def;@b@@b@ Map analyzerPerField = new HashMap();@b@ analyzerPerField.put(def.getEntityField(), new KeywordAnalyzer());@b@ if (def.getGraphField() != null)@b@ analyzerPerField.put(def.getGraphField(), new KeywordAnalyzer());@b@@b@ for (String field : def.fields()) {@b@ Analyzer analyzer = def.getAnalyzer(field);@b@ if (analyzer != null)@b@ analyzerPerField.put(field, analyzer);@b@@b@ }@b@@b@ this.analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(VER), analyzerPerField);@b@@b@ startIndexing();@b@ finishIndexing();@b@ }@b@@b@ public Directory getDirectory() {@b@ return this.directory;@b@ }@b@@b@ public Analyzer getAnalyzer() {@b@ return this.analyzer;@b@ }@b@@b@ public void startIndexing() {@b@ IndexWriterConfig wConfig;@b@ try {@b@ wConfig = new IndexWriterConfig(VER, this.analyzer);@b@ this.indexWriter = new IndexWriter(this.directory, wConfig);@b@ }@b@ catch (IOException e) {@b@ exception(e);@b@ }@b@ }@b@@b@ public void finishIndexing()@b@ {@b@ try {@b@ this.indexWriter.commit();@b@ this.indexWriter.close();@b@ this.indexWriter = null;@b@ }@b@ catch (IOException e) {@b@ exception(e);@b@ }@b@ }@b@@b@ public void abortIndexing()@b@ {@b@ try {@b@ this.indexWriter.rollback();@b@ }@b@ catch (IOException ex) {@b@ exception(ex);@b@ }@b@ }@b@@b@ public void close()@b@ {@b@ if (this.indexWriter != null)@b@ try {@b@ this.indexWriter.close();@b@ }@b@ catch (IOException ex) {@b@ exception(ex);@b@ }@b@ }@b@@b@ public void addEntity(Entity entity)@b@ {@b@ if (log.isDebugEnabled())@b@ log.debug("Add entity: " + entity);@b@ try {@b@ boolean autoBatch = this.indexWriter == null;@b@@b@ Document doc = doc(entity);@b@ if (autoBatch)@b@ startIndexing();@b@ this.indexWriter.addDocument(doc);@b@ if (autoBatch)@b@ finishIndexing();@b@ }@b@ catch (IOException e) {@b@ exception(e);@b@ }@b@ }@b@@b@ private Document doc(Entity entity) {@b@ Document doc = new Document();@b@ Field entField = new Field(this.docDef.getEntityField(), entity.getId(), ftIRI);@b@ doc.add(entField);@b@@b@ String graphField = this.docDef.getGraphField();@b@ if (graphField != null) {@b@ Field gField = new Field(graphField, entity.getGraph(), ftString);@b@ doc.add(gField);@b@ }@b@@b@ for (Map.Entry e : entity.getMap().entrySet()) {@b@ Field field = new Field((String)e.getKey(), (String)e.getValue(), ftText);@b@ doc.add(field);@b@ }@b@ return doc;@b@ }@b@@b@ public Map<String, Node> get(String uri) {@b@ IndexReader indexReader;@b@ try {@b@ indexReader = DirectoryReader.open(this.directory);@b@ List x = get$(indexReader, uri);@b@ if (x.size() == 0) {@b@ return null;@b@ }@b@@b@ return ((Map)x.get(0));@b@ }@b@ catch (Exception ex) {@b@ exception(ex); }@b@ return null;@b@ }@b@@b@ private static Query parseQuery(String queryString, String primaryField, Analyzer analyzer) throws ParseException@b@ {@b@ QueryParser queryParser = new QueryParser(VER, primaryField, analyzer);@b@ queryParser.setAllowLeadingWildcard(true);@b@ Query query = queryParser.parse(queryString);@b@ return query;@b@ }@b@@b@ private List<Map<String, Node>> get$(IndexReader indexReader, String uri) throws ParseException, IOException {@b@ String escaped = QueryParserBase.escape(uri);@b@ String qs = this.docDef.getEntityField() + ":" + escaped;@b@ Query query = parseQuery(qs, this.docDef.getPrimaryField(), this.analyzer);@b@ IndexSearcher indexSearcher = new IndexSearcher(indexReader);@b@ ScoreDoc[] sDocs = indexSearcher.search(query, 1).scoreDocs;@b@ List records = new ArrayList();@b@@b@ ScoreDoc[] arr$ = sDocs; int len$ = arr$.length; for (int i$ = 0; i$ < len$; ++i$) { ScoreDoc sd = arr$[i$];@b@@b@ Document doc = indexSearcher.doc(sd.doc);@b@ String[] x = doc.getValues(this.docDef.getEntityField());@b@ if (x.length != 1);@b@ String uriStr = x[0];@b@ Map record = new HashMap();@b@ Node entity = NodeFactory.createURI(uriStr);@b@ record.put(this.docDef.getEntityField(), entity);@b@@b@ for (String f : this.docDef.fields())@b@ {@b@ String[] values = doc.getValues(f);@b@ String[] arr$ = values; int len$ = arr$.length; for (int i$ = 0; i$ < len$; ++i$) { String v = arr$[i$];@b@ Node n = entryToNode(v);@b@ record.put(f, n);@b@ }@b@ records.add(record);@b@ }@b@ }@b@ return records;@b@ }@b@@b@ public List<Node> query(String qs)@b@ {@b@ return query(qs, MAX_N);@b@ }@b@@b@ public List<Node> query(String qs, int limit) {@b@ IndexReader indexReader;@b@ try {@b@ indexReader = DirectoryReader.open(this.directory); Object localObject1 = null;@b@ try { List localList = query$(indexReader, qs, limit);@b@ return localList; } catch (Throwable localThrowable1) { } finally { if (indexReader != null) if (localObject1 != null) try { indexReader.close(); } catch (Throwable x2) { localObject1.addSuppressed(x2); } else indexReader.close(); @b@ }@b@ } catch (Exception ex) {@b@ exception(ex); }@b@ return null;@b@ }@b@@b@ private List<Node> query$(IndexReader indexReader, String qs, int limit) throws ParseException, IOException@b@ {@b@ IndexSearcher indexSearcher = new IndexSearcher(indexReader);@b@ Query query = parseQuery(qs, this.docDef.getPrimaryField(), this.analyzer);@b@ if (limit <= 0)@b@ limit = MAX_N;@b@ ScoreDoc[] sDocs = indexSearcher.search(query, limit).scoreDocs;@b@@b@ List results = new ArrayList();@b@@b@ ScoreDoc[] arr$ = sDocs; int len$ = arr$.length; for (int i$ = 0; i$ < len$; ++i$) { ScoreDoc sd = arr$[i$];@b@ Document doc = indexSearcher.doc(sd.doc);@b@ String[] values = doc.getValues(this.docDef.getEntityField());@b@ String[] arr$ = values; int len$ = arr$.length; for (int i$ = 0; i$ < len$; ++i$) { String v = arr$[i$];@b@ Node n = TextQueryFuncs.stringToNode(v);@b@ results.add(n);@b@ }@b@ }@b@ return results;@b@ }@b@@b@ public EntityDefinition getDocDef()@b@ {@b@ return this.docDef;@b@ }@b@@b@ private Node entryToNode(String v)@b@ {@b@ return NodeFactoryExtra.createLiteralNode(v, null, null);@b@ }@b@@b@ private static void exception(Exception ex) {@b@ throw new TextIndexException(ex);@b@ }@b@@b@ static@b@ {@b@ ftIRI.setTokenized(false);@b@ ftIRI.setStored(true);@b@ ftIRI.setIndexed(true);@b@ ftIRI.freeze();@b@@b@ ftString = StringField.TYPE_NOT_STORED;@b@ ftText = TextField.TYPE_NOT_STORED;@b@ }@b@}
3.TextIndexSolr实现类
package org.apache.jena.query.text;@b@@b@import com.hp.hpl.jena.graph.Node;@b@import com.hp.hpl.jena.graph.NodeFactory;@b@import com.hp.hpl.jena.sparql.util.NodeFactoryExtra;@b@import java.util.ArrayList;@b@import java.util.Collection;@b@import java.util.HashMap;@b@import java.util.Iterator;@b@import java.util.List;@b@import java.util.Map;@b@import java.util.Map.Entry;@b@import org.apache.solr.client.solrj.SolrQuery;@b@import org.apache.solr.client.solrj.SolrServer;@b@import org.apache.solr.client.solrj.SolrServerException;@b@import org.apache.solr.client.solrj.response.QueryResponse;@b@import org.apache.solr.client.solrj.util.ClientUtils;@b@import org.apache.solr.common.SolrDocument;@b@import org.apache.solr.common.SolrDocumentList;@b@import org.apache.solr.common.SolrInputDocument;@b@import org.slf4j.Logger;@b@import org.slf4j.LoggerFactory;@b@@b@public class TextIndexSolr@b@ implements TextIndex@b@{@b@ private static Logger log = LoggerFactory.getLogger(TextIndexSolr.class);@b@ private final SolrServer solrServer;@b@ private EntityDefinition docDef;@b@ private static int MAX_N = 10000;@b@@b@ public TextIndexSolr(SolrServer server, EntityDefinition def)@b@ {@b@ this.solrServer = server;@b@ this.docDef = def;@b@ }@b@@b@ public void startIndexing()@b@ {@b@ }@b@@b@ public void finishIndexing()@b@ {@b@ try {@b@ this.solrServer.commit(); } catch (Exception ex) {@b@ exception(ex);@b@ }@b@ }@b@@b@ public void abortIndexing() {@b@ try {@b@ this.solrServer.rollback(); } catch (Exception ex) {@b@ exception(ex);@b@ }@b@ }@b@@b@ public void close()@b@ {@b@ if (this.solrServer != null)@b@ this.solrServer.shutdown();@b@ }@b@@b@ public void addEntity(Entity entity)@b@ {@b@ SolrInputDocument doc;@b@ try@b@ {@b@ doc = solrDoc(entity);@b@ this.solrServer.add(doc); } catch (Exception e) {@b@ exception(e);@b@ }@b@ }@b@@b@ private SolrInputDocument solrDoc(Entity entity) {@b@ SolrInputDocument doc = new SolrInputDocument();@b@ doc.addField(this.docDef.getEntityField(), entity.getId());@b@@b@ String graphField = this.docDef.getGraphField();@b@ if (graphField != null)@b@ {@b@ doc.addField(graphField, entity.getGraph());@b@ }@b@@b@ HashMap map = new HashMap();@b@ for (Map.Entry e : entity.getMap().entrySet()) {@b@ map.put("add", e.getValue());@b@ doc.addField((String)e.getKey(), map);@b@ }@b@ return doc;@b@ }@b@@b@ public Map<String, Node> get(String uri)@b@ {@b@ String escaped = ClientUtils.escapeQueryChars(uri);@b@ String qs = this.docDef.getEntityField() + ":" + escaped;@b@ SolrDocumentList solrResults = solrQuery(qs, 1);@b@@b@ List records = process(solrResults);@b@ if (records.size() == 0)@b@ return null;@b@ if (records.size() > 1)@b@ log.warn("Multiple docs for one URI: " + uri);@b@ return ((Map)records.get(0));@b@ }@b@@b@ private List<Map<String, Node>> process(SolrDocumentList solrResults)@b@ {@b@ List records = new ArrayList();@b@@b@ for (SolrDocument sd : solrResults)@b@ {@b@ Map record = new HashMap();@b@ String uriStr = (String)sd.getFieldValue(this.docDef.getEntityField());@b@ Node entity = NodeFactory.createURI(uriStr);@b@ record.put(this.docDef.getEntityField(), entity);@b@@b@ Iterator i$ = this.docDef.fields().iterator();@b@ while (true) { String f;@b@ Object obj;@b@ List vals;@b@ while (true) { while (true) { if (!(i$.hasNext())) break label184; f = (String)i$.next();@b@@b@ obj = sd.getFieldValue(f);@b@@b@ if (obj != null)@b@ break;@b@ }@b@@b@ if (!(obj instanceof List))@b@ break;@b@@b@ vals = (List)obj;@b@ }@b@@b@ String v = (String)obj;@b@ Node n = entryToNode(v);@b@ record.put(f, n);@b@ }@b@@b@ records.add(record);@b@ }@b@ label184: return records;@b@ }@b@@b@ public List<Node> query(String qs) {@b@ return query(qs, 0);@b@ }@b@@b@ public List<Node> query(String qs, int limit)@b@ {@b@ SolrDocumentList solrResults = solrQuery(qs, limit);@b@ List results = new ArrayList();@b@@b@ for (SolrDocument sd : solrResults)@b@ {@b@ String str = (String)sd.getFieldValue(this.docDef.getEntityField());@b@@b@ Node n = TextQueryFuncs.stringToNode(str);@b@ results.add(n);@b@ }@b@@b@ if ((limit > 0) && (results.size() > limit))@b@ results = results.subList(0, limit);@b@@b@ return results;@b@ }@b@@b@ private SolrDocumentList solrQuery(String qs, int limit)@b@ {@b@ SolrQuery sq = new SolrQuery(qs);@b@@b@ if (limit > 0)@b@ sq.setRows(Integer.valueOf(limit));@b@ else@b@ sq.setRows(Integer.valueOf(MAX_N));@b@ try@b@ {@b@ sq.add("df", new String[] { this.docDef.getPrimaryField() });@b@ QueryResponse rsp = this.solrServer.query(sq);@b@ SolrDocumentList docs = rsp.getResults();@b@ return docs; } catch (SolrServerException e) {@b@ exception(e); } return null;@b@ }@b@@b@ public EntityDefinition getDocDef()@b@ {@b@ return this.docDef;@b@ }@b@@b@ private Node entryToNode(String v)@b@ {@b@ return NodeFactoryExtra.createLiteralNode(v, null, null); }@b@@b@ public SolrServer getServer() {@b@ return this.solrServer;@b@ }@b@@b@ private static Void exception(Exception ex) {@b@ throw new TextIndexException(ex);@b@ }@b@}