001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.browseengine.bobo.api.BoboBrowser;
018    import com.browseengine.bobo.api.BoboIndexReader;
019    import com.browseengine.bobo.api.Browsable;
020    import com.browseengine.bobo.api.BrowseHit;
021    import com.browseengine.bobo.api.BrowseRequest;
022    import com.browseengine.bobo.api.BrowseResult;
023    import com.browseengine.bobo.api.FacetAccessible;
024    import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025    import com.browseengine.bobo.api.FacetSpec;
026    import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027    import com.browseengine.bobo.facets.FacetHandler;
028    import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029    import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030    import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031    
032    import com.liferay.portal.kernel.dao.orm.QueryUtil;
033    import com.liferay.portal.kernel.json.JSONArray;
034    import com.liferay.portal.kernel.json.JSONObject;
035    import com.liferay.portal.kernel.log.Log;
036    import com.liferay.portal.kernel.log.LogFactoryUtil;
037    import com.liferay.portal.kernel.search.Document;
038    import com.liferay.portal.kernel.search.DocumentImpl;
039    import com.liferay.portal.kernel.search.Field;
040    import com.liferay.portal.kernel.search.Hits;
041    import com.liferay.portal.kernel.search.HitsImpl;
042    import com.liferay.portal.kernel.search.IndexSearcher;
043    import com.liferay.portal.kernel.search.ParseException;
044    import com.liferay.portal.kernel.search.Query;
045    import com.liferay.portal.kernel.search.QueryConfig;
046    import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047    import com.liferay.portal.kernel.search.SearchContext;
048    import com.liferay.portal.kernel.search.SearchException;
049    import com.liferay.portal.kernel.search.Sort;
050    import com.liferay.portal.kernel.search.facet.Facet;
051    import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052    import com.liferay.portal.kernel.search.facet.RangeFacet;
053    import com.liferay.portal.kernel.search.facet.SimpleFacet;
054    import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055    import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056    import com.liferay.portal.kernel.util.ArrayUtil;
057    import com.liferay.portal.kernel.util.StringPool;
058    import com.liferay.portal.kernel.util.StringUtil;
059    import com.liferay.portal.kernel.util.Time;
060    import com.liferay.portal.kernel.util.Validator;
061    import com.liferay.portal.search.BoboFacetCollector;
062    import com.liferay.portal.util.PropsValues;
063    
064    import java.io.IOException;
065    
066    import java.util.ArrayList;
067    import java.util.List;
068    import java.util.Locale;
069    import java.util.Map;
070    
071    import org.apache.lucene.document.NumericField;
072    import org.apache.lucene.index.IndexReader;
073    import org.apache.lucene.search.BooleanQuery;
074    import org.apache.lucene.search.Explanation;
075    import org.apache.lucene.search.ScoreDoc;
076    import org.apache.lucene.search.SortField;
077    import org.apache.lucene.search.TopFieldDocs;
078    
079    /**
080     * @author Bruno Farache
081     */
082    public class LuceneIndexSearcherImpl implements IndexSearcher {
083    
084            public Hits search(SearchContext searchContext, Query query)
085                    throws SearchException {
086    
087                    if (_log.isDebugEnabled()) {
088                            _log.debug("Query " + query);
089                    }
090    
091                    Hits hits = null;
092    
093                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
094                    Map<String, Facet> facets = null;
095                    BrowseRequest browseRequest = null;
096                    Browsable browsable = null;
097    
098                    try {
099                            indexSearcher = LuceneHelperUtil.getSearcher(
100                                    searchContext.getCompanyId(), true);
101    
102                            List<FacetHandler<?>> facetHandlers =
103                                    new ArrayList<FacetHandler<?>>();
104    
105                            facets = searchContext.getFacets();
106    
107                            for (Facet facet : facets.values()) {
108                                    if (facet.isStatic()) {
109                                            continue;
110                                    }
111    
112                                    FacetConfiguration facetConfiguration =
113                                            facet.getFacetConfiguration();
114    
115                                    if (facet instanceof MultiValueFacet) {
116                                            MultiValueFacetHandler multiValueFacetHandler =
117                                                    new MultiValueFacetHandler(
118                                                            facetConfiguration.getFieldName(),
119                                                            facetConfiguration.getFieldName());
120    
121                                            JSONObject dataJSONObject = facetConfiguration.getData();
122    
123                                            if (dataJSONObject.has("maxTerms")) {
124                                                    multiValueFacetHandler.setMaxItems(
125                                                            dataJSONObject.getInt("maxTerms"));
126                                            }
127    
128                                            facetHandlers.add(multiValueFacetHandler);
129                                    }
130                                    else if (facet instanceof RangeFacet) {
131                                            List<String> ranges = new ArrayList<String>();
132    
133                                            JSONObject dataJSONObject = facetConfiguration.getData();
134    
135                                            JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
136                                                    "ranges");
137    
138                                            if (rangesJSONArray != null) {
139                                                    for (int i = 0; i < rangesJSONArray.length(); i++) {
140                                                            JSONObject rangeJSONObject =
141                                                                    rangesJSONArray.getJSONObject(i);
142    
143                                                            ranges.add(rangeJSONObject.getString("range"));
144                                                    }
145                                            }
146    
147                                            RangeFacetHandler rangeFacetHandler =
148                                                    new RangeFacetHandler(
149                                                            facetConfiguration.getFieldName(),
150                                                            facetConfiguration.getFieldName(), ranges);
151    
152                                            rangeFacetHandler.setTermCountSize(TermCountSize.large);
153    
154                                            facetHandlers.add(rangeFacetHandler);
155                                    }
156                                    else if (facet instanceof SimpleFacet) {
157                                            SimpleFacetHandler simpleFacetHandler =
158                                                    new SimpleFacetHandler(
159                                                            facetConfiguration.getFieldName(),
160                                                            facetConfiguration.getFieldName());
161    
162                                            facetHandlers.add(simpleFacetHandler);
163                                    }
164                            }
165    
166                            BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
167                                    indexSearcher.getIndexReader(), facetHandlers);
168    
169                            SortField[] sortFields = new SortField[0];
170    
171                            Sort[] sorts = searchContext.getSorts();
172    
173                            if (sorts != null) {
174                                    sortFields = new SortField[sorts.length];
175    
176                                    for (int i = 0; i < sorts.length; i++) {
177                                            Sort sort = sorts[i];
178    
179                                            sortFields[i] = new SortField(
180                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
181                                    }
182                            }
183    
184                            browseRequest = new BrowseRequest();
185    
186                            for (Facet facet : facets.values()) {
187                                    if (facet.isStatic()) {
188                                            continue;
189                                    }
190    
191                                    FacetConfiguration facetConfiguration =
192                                            facet.getFacetConfiguration();
193    
194                                    FacetSpec facetSpec = new FacetSpec();
195    
196                                    facetSpec.setOrderBy(
197                                            FacetSortSpec.valueOf(facetConfiguration.getOrder()));
198    
199                                    browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
200                            }
201    
202                            browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
203                            browseRequest.setOffset(0);
204                            browseRequest.setQuery(
205                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
206                                            query));
207                            browseRequest.setSort(sortFields);
208    
209                            browsable = new BoboBrowser(boboIndexReader);
210    
211                            long startTime = System.currentTimeMillis();
212    
213                            BrowseResult browseResult = browsable.browse(browseRequest);
214    
215                            BrowseHit[] browseHits = browseResult.getHits();
216    
217                            long endTime = System.currentTimeMillis();
218    
219                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
220    
221                            hits = toHits(
222                                    indexSearcher, new HitDocs(browseHits), query, startTime,
223                                    searchTime, searchContext.getStart(), searchContext.getEnd());
224    
225                            Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
226    
227                            for (Map.Entry<String, FacetAccessible> entry :
228                                            facetMap.entrySet()) {
229    
230                                    Facet facet = facets.get(entry.getKey());
231    
232                                    FacetAccessible facetAccessible = entry.getValue();
233    
234                                    FacetCollector facetCollector = new BoboFacetCollector(
235                                            entry.getKey(), facetAccessible);
236    
237                                    facet.setFacetCollector(facetCollector);
238                            }
239                    }
240                    catch (BooleanQuery.TooManyClauses tmc) {
241                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
242    
243                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
244    
245                            try {
246                                    long startTime = System.currentTimeMillis();
247    
248                                    BrowseResult result = browsable.browse(browseRequest);
249    
250                                    BrowseHit[] browseHits = result.getHits();
251    
252                                    long endTime = System.currentTimeMillis();
253    
254                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
255    
256                                    hits = toHits(
257                                            indexSearcher, new HitDocs(browseHits), query, startTime,
258                                            searchTime, searchContext.getStart(),
259                                            searchContext.getEnd());
260    
261                                    Map<String, FacetAccessible> facetMap = result.getFacetMap();
262    
263                                    for (Map.Entry<String, FacetAccessible> entry :
264                                                    facetMap.entrySet()) {
265    
266                                            Facet facet = facets.get(entry.getKey());
267    
268                                            FacetAccessible facetAccessible = entry.getValue();
269    
270                                            FacetCollector facetCollector = new BoboFacetCollector(
271                                                    entry.getKey(), facetAccessible);
272    
273                                            facet.setFacetCollector(facetCollector);
274                                    }
275                            }
276                            catch (Exception e) {
277                                    throw new SearchException(e);
278                            }
279                            finally {
280                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
281                            }
282                    }
283                    catch (ParseException pe) {
284                            _log.error("Query " + query, pe);
285    
286                            return new HitsImpl();
287                    }
288                    catch (Exception e) {
289                            throw new SearchException(e);
290                    }
291                    finally {
292                            if (browsable != null) {
293                                    try {
294                                            browsable.close();
295                                    }
296                                    catch (IOException ioe) {
297                                            _log.error(ioe, ioe);
298                                    }
299                            }
300    
301                            if (indexSearcher != null) {
302                                    try {
303                                            indexSearcher.close();
304                                    }
305                                    catch (IOException ioe) {
306                                            _log.error(ioe, ioe);
307                                    }
308                            }
309                    }
310    
311                    if (_log.isDebugEnabled()) {
312                            _log.debug(
313                                    "Search found " + hits.getLength() + " results in " +
314                                            hits.getSearchTime() + "ms");
315                    }
316    
317                    return hits;
318            }
319    
320            public Hits search(
321                            String searchEngineId, long companyId, Query query, Sort[] sorts,
322                            int start, int end)
323                    throws SearchException {
324    
325                    if (_log.isDebugEnabled()) {
326                            _log.debug("Query " + query);
327                    }
328    
329                    Hits hits = null;
330    
331                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
332                    org.apache.lucene.search.Sort luceneSort = null;
333    
334                    try {
335                            indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
336    
337                            if (sorts != null) {
338                                    SortField[] sortFields = new SortField[sorts.length];
339    
340                                    for (int i = 0; i < sorts.length; i++) {
341                                            Sort sort = sorts[i];
342    
343                                            sortFields[i] = new SortField(
344                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
345                                    }
346    
347                                    luceneSort = new org.apache.lucene.search.Sort(sortFields);
348                            }
349                            else {
350                                    luceneSort = new org.apache.lucene.search.Sort();
351                            }
352    
353                            long startTime = System.currentTimeMillis();
354    
355                            TopFieldDocs topFieldDocs = indexSearcher.search(
356                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
357                                            query),
358                                    null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
359    
360                            long endTime = System.currentTimeMillis();
361    
362                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
363    
364                            hits = toHits(
365                                    indexSearcher, new HitDocs(topFieldDocs), query, startTime,
366                                    searchTime, start, end);
367                    }
368                    catch (BooleanQuery.TooManyClauses tmc) {
369                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
370    
371                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
372    
373                            try {
374                                    long startTime = System.currentTimeMillis();
375    
376                                    TopFieldDocs topFieldDocs = indexSearcher.search(
377                                            (org.apache.lucene.search.Query)
378                                                    QueryTranslatorUtil.translate(query),
379                                            null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
380    
381                                    long endTime = System.currentTimeMillis();
382    
383                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
384    
385                                    hits = toHits(
386                                            indexSearcher, new HitDocs(topFieldDocs), query, startTime,
387                                            searchTime, start, end);
388                            }
389                            catch (Exception e) {
390                                    throw new SearchException(e);
391                            }
392                            finally {
393                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
394                            }
395                    }
396                    catch (ParseException pe) {
397                            _log.error("Query " + query, pe);
398    
399                            return new HitsImpl();
400                    }
401                    catch (Exception e) {
402                            throw new SearchException(e);
403                    }
404                    finally {
405                            if (indexSearcher != null) {
406                                    try {
407                                            indexSearcher.close();
408                                    }
409                                    catch (IOException ioe) {
410                                            _log.error(ioe, ioe);
411                                    }
412                            }
413                    }
414    
415                    if (_log.isDebugEnabled()) {
416                            _log.debug(
417                                    "Search found " + hits.getLength() + " results in " +
418                                            hits.getSearchTime() + "ms");
419                    }
420    
421                    return hits;
422            }
423    
424            protected DocumentImpl getDocument(
425                    org.apache.lucene.document.Document oldDocument) {
426    
427                    DocumentImpl newDocument = new DocumentImpl();
428    
429                    List<org.apache.lucene.document.Fieldable> oldFieldables =
430                            oldDocument.getFields();
431    
432                    for (org.apache.lucene.document.Fieldable oldFieldable :
433                                    oldFieldables) {
434    
435                            Field newField = null;
436    
437                            String[] values = oldDocument.getValues(oldFieldable.name());
438    
439                            if ((values != null) && (values.length > 1)) {
440                                    newField = new Field(oldFieldable.name(), values);
441                            }
442                            else {
443                                    newField = new Field(
444                                            oldFieldable.name(), oldFieldable.stringValue());
445                            }
446    
447                            newField.setNumeric(oldFieldable instanceof NumericField);
448                            newField.setTokenized(oldFieldable.isTokenized());
449    
450                            newDocument.add(newField);
451                    }
452    
453                    return newDocument;
454            }
455    
456            protected String[] getQueryTerms(Query query) {
457                    String[] queryTerms = new String[0];
458    
459                    try {
460                            queryTerms = LuceneHelperUtil.getQueryTerms(
461                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
462                                            query));
463                    }
464                    catch (ParseException pe) {
465                            _log.error("Query " + query, pe);
466                    }
467    
468                    return queryTerms;
469            }
470    
471            protected String getSnippet(
472                            org.apache.lucene.document.Document doc, Query query, String field,
473                            Locale locale)
474                    throws IOException {
475    
476                    String localizedName = DocumentImpl.getLocalizedName(locale, field);
477    
478                    String[] values = doc.getValues(localizedName);
479    
480                    if ((values == null) || (values.length == 0)) {
481                            values = doc.getValues(field);
482                    }
483    
484                    String snippet = null;
485    
486                    if (Validator.isNull(values)) {
487                            return snippet;
488                    }
489    
490                    String s = StringUtil.merge(values);
491    
492                    try {
493                            snippet = LuceneHelperUtil.getSnippet(
494                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
495                                            query),
496                                    field, s);
497                    }
498                    catch (ParseException pe) {
499                            _log.error("Query " + query, pe);
500                    }
501    
502                    return snippet;
503            }
504    
505            protected Hits toHits(
506                            org.apache.lucene.search.IndexSearcher indexSearcher,
507                            HitDocs hitDocs, Query query, long startTime, float searchTime,
508                            int start, int end)
509                    throws IOException, ParseException {
510    
511                    int length = hitDocs.getTotalHits();
512    
513                    if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
514                            start = 0;
515                            end = length;
516                    }
517    
518                    String[] queryTerms = getQueryTerms(query);
519    
520                    IndexReader indexReader = indexSearcher.getIndexReader();
521    
522                    List<String> indexedFieldNames = new ArrayList<String> (
523                            indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
524    
525                    org.apache.lucene.search.Query luceneQuery =
526                            (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
527                                    query);
528    
529                    int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
530                            luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
531    
532                    Hits hits = new HitsImpl();
533    
534                    if ((start > -1) && (start <= end)) {
535                            if (end > length) {
536                                    end = length;
537                            }
538    
539                            if (start > end) {
540                                    start = end;
541                            }
542    
543                            int subsetTotal = end - start;
544    
545                            if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
546                                    subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
547                            }
548    
549                            List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
550                            List<String> subsetSnippets = new ArrayList<String>(subsetTotal);
551                            List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
552    
553                            QueryConfig queryConfig = query.getQueryConfig();
554    
555                            for (int i = start; i < end; i++) {
556                                    if (i >= PropsValues.INDEX_SEARCH_LIMIT) {
557                                            break;
558                                    }
559    
560                                    int docId = hitDocs.getDocId(i);
561    
562                                    org.apache.lucene.document.Document document =
563                                            indexSearcher.doc(docId);
564    
565                                    Document subsetDocument = getDocument(document);
566    
567                                    String subsetSnippet = StringPool.BLANK;
568    
569                                    if (queryConfig.isHighlightEnabled()) {
570                                            subsetSnippet = getSnippet(
571                                                    document, query, Field.CONTENT,
572                                                    queryConfig.getLocale());
573                                    }
574    
575                                    subsetDocument.addText(Field.SNIPPET, subsetSnippet);
576    
577                                    subsetSnippets.add(subsetSnippet);
578    
579                                    subsetDocs.add(subsetDocument);
580    
581                                    Float subsetScore = hitDocs.getScore(i);
582    
583                                    if (scoredFieldNamesCount > 0) {
584                                            subsetScore = subsetScore / scoredFieldNamesCount;
585                                    }
586    
587                                    subsetScores.add(subsetScore);
588    
589                                    if (_log.isDebugEnabled()) {
590                                            try {
591                                                    Explanation explanation = indexSearcher.explain(
592                                                            luceneQuery, docId);
593    
594                                                    _log.debug(explanation.toString());
595                                            }
596                                            catch (Exception e) {
597                                            }
598                                    }
599                            }
600    
601                            hits.setStart(startTime);
602                            hits.setSearchTime(searchTime);
603                            hits.setQuery(query);
604                            hits.setQueryTerms(queryTerms);
605                            hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
606                            hits.setLength(length);
607                            hits.setSnippets(
608                                    subsetSnippets.toArray(new String[subsetSnippets.size()]));
609                            hits.setScores(
610                                    subsetScores.toArray(new Float[subsetScores.size()]));
611                    }
612    
613                    return hits;
614            }
615    
616            private static Log _log = LogFactoryUtil.getLog(
617                    LuceneIndexSearcherImpl.class);
618    
619            private class HitDocs {
620    
621                    public HitDocs(BrowseHit[] browseHits) {
622                            _browseHits = browseHits;
623                    }
624    
625                    public HitDocs(TopFieldDocs topFieldDocs) {
626                            _topFieldDocs = topFieldDocs;
627                    }
628    
629                    public int getDocId(int i) {
630                            if (_topFieldDocs != null) {
631                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
632    
633                                    return scoreDoc.doc;
634                            }
635                            else if (_browseHits != null) {
636                                    return _browseHits[i].getDocid();
637                            }
638    
639                            throw new IllegalStateException();
640                    }
641    
642                    public float getScore(int i) {
643                            if (_topFieldDocs != null) {
644                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
645    
646                                    return scoreDoc.score;
647                            }
648                            else if (_browseHits != null) {
649                                    return _browseHits[i].getScore();
650                            }
651    
652                            throw new IllegalStateException();
653                    }
654    
655                    public int getTotalHits() {
656                            if (_topFieldDocs != null) {
657                                    return _topFieldDocs.totalHits;
658                            }
659                            else if (_browseHits != null) {
660                                    return _browseHits.length;
661                            }
662    
663                            throw new IllegalStateException();
664                    }
665    
666                    private BrowseHit[] _browseHits;
667                    private TopFieldDocs _topFieldDocs;
668    
669            }
670    
671    }