001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.Browsable;
020 import com.browseengine.bobo.api.BrowseHit;
021 import com.browseengine.bobo.api.BrowseRequest;
022 import com.browseengine.bobo.api.BrowseResult;
023 import com.browseengine.bobo.api.FacetAccessible;
024 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025 import com.browseengine.bobo.api.FacetSpec;
026 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027 import com.browseengine.bobo.facets.FacetHandler;
028 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031
032 import com.liferay.portal.kernel.dao.orm.QueryUtil;
033 import com.liferay.portal.kernel.json.JSONArray;
034 import com.liferay.portal.kernel.json.JSONObject;
035 import com.liferay.portal.kernel.log.Log;
036 import com.liferay.portal.kernel.log.LogFactoryUtil;
037 import com.liferay.portal.kernel.search.Document;
038 import com.liferay.portal.kernel.search.DocumentImpl;
039 import com.liferay.portal.kernel.search.Field;
040 import com.liferay.portal.kernel.search.Hits;
041 import com.liferay.portal.kernel.search.HitsImpl;
042 import com.liferay.portal.kernel.search.IndexSearcher;
043 import com.liferay.portal.kernel.search.ParseException;
044 import com.liferay.portal.kernel.search.Query;
045 import com.liferay.portal.kernel.search.QueryConfig;
046 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047 import com.liferay.portal.kernel.search.SearchContext;
048 import com.liferay.portal.kernel.search.SearchException;
049 import com.liferay.portal.kernel.search.Sort;
050 import com.liferay.portal.kernel.search.facet.Facet;
051 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052 import com.liferay.portal.kernel.search.facet.RangeFacet;
053 import com.liferay.portal.kernel.search.facet.SimpleFacet;
054 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056 import com.liferay.portal.kernel.util.ArrayUtil;
057 import com.liferay.portal.kernel.util.StringPool;
058 import com.liferay.portal.kernel.util.StringUtil;
059 import com.liferay.portal.kernel.util.Time;
060 import com.liferay.portal.kernel.util.Validator;
061 import com.liferay.portal.search.BoboFacetCollector;
062 import com.liferay.portal.util.PropsValues;
063
064 import java.io.IOException;
065
066 import java.util.ArrayList;
067 import java.util.List;
068 import java.util.Locale;
069 import java.util.Map;
070
071 import org.apache.lucene.document.NumericField;
072 import org.apache.lucene.index.IndexReader;
073 import org.apache.lucene.search.BooleanQuery;
074 import org.apache.lucene.search.Explanation;
075 import org.apache.lucene.search.ScoreDoc;
076 import org.apache.lucene.search.SortField;
077 import org.apache.lucene.search.TopFieldDocs;
078
079
082 public class LuceneIndexSearcherImpl implements IndexSearcher {
083
084 public Hits search(SearchContext searchContext, Query query)
085 throws SearchException {
086
087 if (_log.isDebugEnabled()) {
088 _log.debug("Query " + query);
089 }
090
091 Hits hits = null;
092
093 org.apache.lucene.search.IndexSearcher indexSearcher = null;
094 Map<String, Facet> facets = null;
095 BrowseRequest browseRequest = null;
096 Browsable browsable = null;
097
098 try {
099 indexSearcher = LuceneHelperUtil.getSearcher(
100 searchContext.getCompanyId(), true);
101
102 List<FacetHandler<?>> facetHandlers =
103 new ArrayList<FacetHandler<?>>();
104
105 facets = searchContext.getFacets();
106
107 for (Facet facet : facets.values()) {
108 if (facet.isStatic()) {
109 continue;
110 }
111
112 FacetConfiguration facetConfiguration =
113 facet.getFacetConfiguration();
114
115 if (facet instanceof MultiValueFacet) {
116 MultiValueFacetHandler multiValueFacetHandler =
117 new MultiValueFacetHandler(
118 facetConfiguration.getFieldName(),
119 facetConfiguration.getFieldName());
120
121 JSONObject dataJSONObject = facetConfiguration.getData();
122
123 if (dataJSONObject.has("maxTerms")) {
124 multiValueFacetHandler.setMaxItems(
125 dataJSONObject.getInt("maxTerms"));
126 }
127
128 facetHandlers.add(multiValueFacetHandler);
129 }
130 else if (facet instanceof RangeFacet) {
131 List<String> ranges = new ArrayList<String>();
132
133 JSONObject dataJSONObject = facetConfiguration.getData();
134
135 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
136 "ranges");
137
138 if (rangesJSONArray != null) {
139 for (int i = 0; i < rangesJSONArray.length(); i++) {
140 JSONObject rangeJSONObject =
141 rangesJSONArray.getJSONObject(i);
142
143 ranges.add(rangeJSONObject.getString("range"));
144 }
145 }
146
147 RangeFacetHandler rangeFacetHandler =
148 new RangeFacetHandler(
149 facetConfiguration.getFieldName(),
150 facetConfiguration.getFieldName(), ranges);
151
152 rangeFacetHandler.setTermCountSize(TermCountSize.large);
153
154 facetHandlers.add(rangeFacetHandler);
155 }
156 else if (facet instanceof SimpleFacet) {
157 SimpleFacetHandler simpleFacetHandler =
158 new SimpleFacetHandler(
159 facetConfiguration.getFieldName(),
160 facetConfiguration.getFieldName());
161
162 facetHandlers.add(simpleFacetHandler);
163 }
164 }
165
166 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
167 indexSearcher.getIndexReader(), facetHandlers);
168
169 SortField[] sortFields = new SortField[0];
170
171 Sort[] sorts = searchContext.getSorts();
172
173 if (sorts != null) {
174 sortFields = new SortField[sorts.length];
175
176 for (int i = 0; i < sorts.length; i++) {
177 Sort sort = sorts[i];
178
179 sortFields[i] = new SortField(
180 sort.getFieldName(), sort.getType(), sort.isReverse());
181 }
182 }
183
184 browseRequest = new BrowseRequest();
185
186 for (Facet facet : facets.values()) {
187 if (facet.isStatic()) {
188 continue;
189 }
190
191 FacetConfiguration facetConfiguration =
192 facet.getFacetConfiguration();
193
194 FacetSpec facetSpec = new FacetSpec();
195
196 facetSpec.setOrderBy(
197 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
198
199 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
200 }
201
202 browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
203 browseRequest.setOffset(0);
204 browseRequest.setQuery(
205 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
206 query));
207 browseRequest.setSort(sortFields);
208
209 browsable = new BoboBrowser(boboIndexReader);
210
211 long startTime = System.currentTimeMillis();
212
213 BrowseResult browseResult = browsable.browse(browseRequest);
214
215 BrowseHit[] browseHits = browseResult.getHits();
216
217 long endTime = System.currentTimeMillis();
218
219 float searchTime = (float)(endTime - startTime) / Time.SECOND;
220
221 hits = toHits(
222 indexSearcher, new HitDocs(browseHits), query, startTime,
223 searchTime, searchContext.getStart(), searchContext.getEnd());
224
225 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
226
227 for (Map.Entry<String, FacetAccessible> entry :
228 facetMap.entrySet()) {
229
230 Facet facet = facets.get(entry.getKey());
231
232 FacetAccessible facetAccessible = entry.getValue();
233
234 FacetCollector facetCollector = new BoboFacetCollector(
235 entry.getKey(), facetAccessible);
236
237 facet.setFacetCollector(facetCollector);
238 }
239 }
240 catch (BooleanQuery.TooManyClauses tmc) {
241 int maxClauseCount = BooleanQuery.getMaxClauseCount();
242
243 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
244
245 try {
246 long startTime = System.currentTimeMillis();
247
248 BrowseResult result = browsable.browse(browseRequest);
249
250 BrowseHit[] browseHits = result.getHits();
251
252 long endTime = System.currentTimeMillis();
253
254 float searchTime = (float)(endTime - startTime) / Time.SECOND;
255
256 hits = toHits(
257 indexSearcher, new HitDocs(browseHits), query, startTime,
258 searchTime, searchContext.getStart(),
259 searchContext.getEnd());
260
261 Map<String, FacetAccessible> facetMap = result.getFacetMap();
262
263 for (Map.Entry<String, FacetAccessible> entry :
264 facetMap.entrySet()) {
265
266 Facet facet = facets.get(entry.getKey());
267
268 FacetAccessible facetAccessible = entry.getValue();
269
270 FacetCollector facetCollector = new BoboFacetCollector(
271 entry.getKey(), facetAccessible);
272
273 facet.setFacetCollector(facetCollector);
274 }
275 }
276 catch (Exception e) {
277 throw new SearchException(e);
278 }
279 finally {
280 BooleanQuery.setMaxClauseCount(maxClauseCount);
281 }
282 }
283 catch (ParseException pe) {
284 _log.error("Query " + query, pe);
285
286 return new HitsImpl();
287 }
288 catch (Exception e) {
289 throw new SearchException(e);
290 }
291 finally {
292 if (browsable != null) {
293 try {
294 browsable.close();
295 }
296 catch (IOException ioe) {
297 _log.error(ioe, ioe);
298 }
299 }
300
301 if (indexSearcher != null) {
302 try {
303 indexSearcher.close();
304 }
305 catch (IOException ioe) {
306 _log.error(ioe, ioe);
307 }
308 }
309 }
310
311 if (_log.isDebugEnabled()) {
312 _log.debug(
313 "Search found " + hits.getLength() + " results in " +
314 hits.getSearchTime() + "ms");
315 }
316
317 return hits;
318 }
319
320 public Hits search(
321 String searchEngineId, long companyId, Query query, Sort[] sorts,
322 int start, int end)
323 throws SearchException {
324
325 if (_log.isDebugEnabled()) {
326 _log.debug("Query " + query);
327 }
328
329 Hits hits = null;
330
331 org.apache.lucene.search.IndexSearcher indexSearcher = null;
332 org.apache.lucene.search.Sort luceneSort = null;
333
334 try {
335 indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
336
337 if (sorts != null) {
338 SortField[] sortFields = new SortField[sorts.length];
339
340 for (int i = 0; i < sorts.length; i++) {
341 Sort sort = sorts[i];
342
343 sortFields[i] = new SortField(
344 sort.getFieldName(), sort.getType(), sort.isReverse());
345 }
346
347 luceneSort = new org.apache.lucene.search.Sort(sortFields);
348 }
349 else {
350 luceneSort = new org.apache.lucene.search.Sort();
351 }
352
353 long startTime = System.currentTimeMillis();
354
355 TopFieldDocs topFieldDocs = indexSearcher.search(
356 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
357 query),
358 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
359
360 long endTime = System.currentTimeMillis();
361
362 float searchTime = (float)(endTime - startTime) / Time.SECOND;
363
364 hits = toHits(
365 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
366 searchTime, start, end);
367 }
368 catch (BooleanQuery.TooManyClauses tmc) {
369 int maxClauseCount = BooleanQuery.getMaxClauseCount();
370
371 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
372
373 try {
374 long startTime = System.currentTimeMillis();
375
376 TopFieldDocs topFieldDocs = indexSearcher.search(
377 (org.apache.lucene.search.Query)
378 QueryTranslatorUtil.translate(query),
379 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
380
381 long endTime = System.currentTimeMillis();
382
383 float searchTime = (float)(endTime - startTime) / Time.SECOND;
384
385 hits = toHits(
386 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
387 searchTime, start, end);
388 }
389 catch (Exception e) {
390 throw new SearchException(e);
391 }
392 finally {
393 BooleanQuery.setMaxClauseCount(maxClauseCount);
394 }
395 }
396 catch (ParseException pe) {
397 _log.error("Query " + query, pe);
398
399 return new HitsImpl();
400 }
401 catch (Exception e) {
402 throw new SearchException(e);
403 }
404 finally {
405 if (indexSearcher != null) {
406 try {
407 indexSearcher.close();
408 }
409 catch (IOException ioe) {
410 _log.error(ioe, ioe);
411 }
412 }
413 }
414
415 if (_log.isDebugEnabled()) {
416 _log.debug(
417 "Search found " + hits.getLength() + " results in " +
418 hits.getSearchTime() + "ms");
419 }
420
421 return hits;
422 }
423
424 protected DocumentImpl getDocument(
425 org.apache.lucene.document.Document oldDocument) {
426
427 DocumentImpl newDocument = new DocumentImpl();
428
429 List<org.apache.lucene.document.Fieldable> oldFieldables =
430 oldDocument.getFields();
431
432 for (org.apache.lucene.document.Fieldable oldFieldable :
433 oldFieldables) {
434
435 Field newField = null;
436
437 String[] values = oldDocument.getValues(oldFieldable.name());
438
439 if ((values != null) && (values.length > 1)) {
440 newField = new Field(oldFieldable.name(), values);
441 }
442 else {
443 newField = new Field(
444 oldFieldable.name(), oldFieldable.stringValue());
445 }
446
447 newField.setNumeric(oldFieldable instanceof NumericField);
448 newField.setTokenized(oldFieldable.isTokenized());
449
450 newDocument.add(newField);
451 }
452
453 return newDocument;
454 }
455
456 protected String[] getQueryTerms(Query query) {
457 String[] queryTerms = new String[0];
458
459 try {
460 queryTerms = LuceneHelperUtil.getQueryTerms(
461 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
462 query));
463 }
464 catch (ParseException pe) {
465 _log.error("Query " + query, pe);
466 }
467
468 return queryTerms;
469 }
470
471 protected String getSnippet(
472 org.apache.lucene.document.Document doc, Query query, String field,
473 Locale locale)
474 throws IOException {
475
476 String localizedName = DocumentImpl.getLocalizedName(locale, field);
477
478 String[] values = doc.getValues(localizedName);
479
480 if ((values == null) || (values.length == 0)) {
481 values = doc.getValues(field);
482 }
483
484 String snippet = null;
485
486 if (Validator.isNull(values)) {
487 return snippet;
488 }
489
490 String s = StringUtil.merge(values);
491
492 try {
493 snippet = LuceneHelperUtil.getSnippet(
494 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
495 query),
496 field, s);
497 }
498 catch (ParseException pe) {
499 _log.error("Query " + query, pe);
500 }
501
502 return snippet;
503 }
504
505 protected Hits toHits(
506 org.apache.lucene.search.IndexSearcher indexSearcher,
507 HitDocs hitDocs, Query query, long startTime, float searchTime,
508 int start, int end)
509 throws IOException, ParseException {
510
511 int length = hitDocs.getTotalHits();
512
513 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
514 start = 0;
515 end = length;
516 }
517
518 String[] queryTerms = getQueryTerms(query);
519
520 IndexReader indexReader = indexSearcher.getIndexReader();
521
522 List<String> indexedFieldNames = new ArrayList<String> (
523 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
524
525 org.apache.lucene.search.Query luceneQuery =
526 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
527 query);
528
529 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
530 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
531
532 Hits hits = new HitsImpl();
533
534 if ((start > -1) && (start <= end)) {
535 if (end > length) {
536 end = length;
537 }
538
539 if (start > end) {
540 start = end;
541 }
542
543 int subsetTotal = end - start;
544
545 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
546 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
547 }
548
549 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
550 List<String> subsetSnippets = new ArrayList<String>(subsetTotal);
551 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
552
553 QueryConfig queryConfig = query.getQueryConfig();
554
555 for (int i = start; i < end; i++) {
556 if (i >= PropsValues.INDEX_SEARCH_LIMIT) {
557 break;
558 }
559
560 int docId = hitDocs.getDocId(i);
561
562 org.apache.lucene.document.Document document =
563 indexSearcher.doc(docId);
564
565 Document subsetDocument = getDocument(document);
566
567 String subsetSnippet = StringPool.BLANK;
568
569 if (queryConfig.isHighlightEnabled()) {
570 subsetSnippet = getSnippet(
571 document, query, Field.CONTENT,
572 queryConfig.getLocale());
573 }
574
575 subsetDocument.addText(Field.SNIPPET, subsetSnippet);
576
577 subsetSnippets.add(subsetSnippet);
578
579 subsetDocs.add(subsetDocument);
580
581 Float subsetScore = hitDocs.getScore(i);
582
583 if (scoredFieldNamesCount > 0) {
584 subsetScore = subsetScore / scoredFieldNamesCount;
585 }
586
587 subsetScores.add(subsetScore);
588
589 if (_log.isDebugEnabled()) {
590 try {
591 Explanation explanation = indexSearcher.explain(
592 luceneQuery, docId);
593
594 _log.debug(explanation.toString());
595 }
596 catch (Exception e) {
597 }
598 }
599 }
600
601 hits.setStart(startTime);
602 hits.setSearchTime(searchTime);
603 hits.setQuery(query);
604 hits.setQueryTerms(queryTerms);
605 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
606 hits.setLength(length);
607 hits.setSnippets(
608 subsetSnippets.toArray(new String[subsetSnippets.size()]));
609 hits.setScores(
610 subsetScores.toArray(new Float[subsetScores.size()]));
611 }
612
613 return hits;
614 }
615
616 private static Log _log = LogFactoryUtil.getLog(
617 LuceneIndexSearcherImpl.class);
618
619 private class HitDocs {
620
621 public HitDocs(BrowseHit[] browseHits) {
622 _browseHits = browseHits;
623 }
624
625 public HitDocs(TopFieldDocs topFieldDocs) {
626 _topFieldDocs = topFieldDocs;
627 }
628
629 public int getDocId(int i) {
630 if (_topFieldDocs != null) {
631 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
632
633 return scoreDoc.doc;
634 }
635 else if (_browseHits != null) {
636 return _browseHits[i].getDocid();
637 }
638
639 throw new IllegalStateException();
640 }
641
642 public float getScore(int i) {
643 if (_topFieldDocs != null) {
644 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
645
646 return scoreDoc.score;
647 }
648 else if (_browseHits != null) {
649 return _browseHits[i].getScore();
650 }
651
652 throw new IllegalStateException();
653 }
654
655 public int getTotalHits() {
656 if (_topFieldDocs != null) {
657 return _topFieldDocs.totalHits;
658 }
659 else if (_browseHits != null) {
660 return _browseHits.length;
661 }
662
663 throw new IllegalStateException();
664 }
665
666 private BrowseHit[] _browseHits;
667 private TopFieldDocs _topFieldDocs;
668
669 }
670
671 }