001    /**
002     * Copyright (c) 2000-2011 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.search.Field;
021    import com.liferay.portal.kernel.util.PropsKeys;
022    import com.liferay.portal.kernel.util.StringPool;
023    import com.liferay.portal.kernel.util.StringUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    import com.liferay.portal.util.PropsUtil;
026    import com.liferay.util.lucene.KeywordsUtil;
027    
028    import java.io.IOException;
029    
030    import java.util.HashSet;
031    import java.util.Map;
032    import java.util.Set;
033    import java.util.concurrent.ConcurrentHashMap;
034    
035    import org.apache.lucene.analysis.Analyzer;
036    import org.apache.lucene.analysis.TokenStream;
037    import org.apache.lucene.analysis.WhitespaceAnalyzer;
038    import org.apache.lucene.document.Document;
039    import org.apache.lucene.index.Term;
040    import org.apache.lucene.queryParser.ParseException;
041    import org.apache.lucene.queryParser.QueryParser;
042    import org.apache.lucene.search.BooleanClause;
043    import org.apache.lucene.search.BooleanQuery;
044    import org.apache.lucene.search.IndexSearcher;
045    import org.apache.lucene.search.Query;
046    import org.apache.lucene.search.TermQuery;
047    import org.apache.lucene.search.WildcardQuery;
048    import org.apache.lucene.search.highlight.Highlighter;
049    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
050    import org.apache.lucene.search.highlight.QueryScorer;
051    import org.apache.lucene.search.highlight.QueryTermExtractor;
052    import org.apache.lucene.search.highlight.SimpleFragmenter;
053    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
054    import org.apache.lucene.search.highlight.WeightedTerm;
055    import org.apache.lucene.util.Version;
056    
057    /**
058     * @author Brian Wing Shun Chan
059     * @author Harry Mark
060     * @author Bruno Farache
061     */
062    public class LuceneHelperImpl implements LuceneHelper {
063    
064            public void addDocument(long companyId, Document document)
065                    throws IOException {
066    
067                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
068    
069                    indexAccessor.addDocument(document);
070            }
071    
072            public void addExactTerm(
073                    BooleanQuery booleanQuery, String field, String value) {
074    
075                    //text = KeywordsUtil.escape(value);
076    
077                    Query query = new TermQuery(new Term(field, value));
078    
079                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
080            }
081    
082            public void addRequiredTerm(
083                    BooleanQuery booleanQuery, String field, String value, boolean like) {
084    
085                    if (like) {
086                            value = StringUtil.replace(
087                                    value, StringPool.PERCENT, StringPool.STAR);
088    
089                            value = value.toLowerCase();
090    
091                            WildcardQuery wildcardQuery = new WildcardQuery(
092                                    new Term(field, value));
093    
094                            booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
095                    }
096                    else {
097                            //text = KeywordsUtil.escape(value);
098    
099                            Term term = new Term(field, value);
100                            TermQuery termQuery = new TermQuery(term);
101    
102                            booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
103                    }
104            }
105    
106            public void addTerm(
107                            BooleanQuery booleanQuery, String field, String value, boolean like)
108                    throws ParseException {
109    
110                    if (Validator.isNull(value)) {
111                            return;
112                    }
113    
114                    if (like) {
115                            value = StringUtil.replace(
116                                    value, StringPool.PERCENT, StringPool.BLANK);
117    
118                            value = value.toLowerCase();
119    
120                            Term term = new Term(
121                                    field, StringPool.STAR.concat(value).concat(StringPool.STAR));
122    
123                            WildcardQuery wildcardQuery = new WildcardQuery(term);
124    
125                            booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
126                    }
127                    else {
128                            QueryParser queryParser = new QueryParser(
129                                    _version, field, getAnalyzer());
130    
131                            try {
132                                    Query query = queryParser.parse(value);
133    
134                                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
135                            }
136                            catch (ParseException pe) {
137                                    if (_log.isDebugEnabled()) {
138                                            _log.debug(
139                                                    "ParseException thrown, reverting to literal search",
140                                                    pe);
141                                    }
142    
143                                    value = KeywordsUtil.escape(value);
144    
145                                    Query query = queryParser.parse(value);
146    
147                                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
148                            }
149                    }
150            }
151    
152            public void delete(long companyId) {
153                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
154    
155                    indexAccessor.delete();
156            }
157    
158            public void deleteDocuments(long companyId, Term term) throws IOException {
159                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
160    
161                    indexAccessor.deleteDocuments(term);
162            }
163    
164            public Analyzer getAnalyzer() {
165                    try {
166                            return (Analyzer)_analyzerClass.newInstance();
167                    }
168                    catch (Exception e) {
169                            throw new RuntimeException(e);
170                    }
171            }
172    
173            public String[] getQueryTerms(Query query) {
174                    String[] fieldNames = new String[] {
175                            Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
176                            Field.USER_NAME
177                    };
178    
179                    WeightedTerm[] weightedTerms = null;
180    
181                    for (String fieldName : fieldNames) {
182                            weightedTerms = QueryTermExtractor.getTerms(
183                                    query, false, fieldName);
184    
185                            if (weightedTerms.length > 0) {
186                                    break;
187                            }
188                    }
189    
190                    Set<String> queryTerms = new HashSet<String>();
191    
192                    for (WeightedTerm weightedTerm : weightedTerms) {
193                            queryTerms.add(weightedTerm.getTerm());
194                    }
195    
196                    return queryTerms.toArray(new String[queryTerms.size()]);
197            }
198    
199            public IndexSearcher getSearcher(long companyId, boolean readOnly)
200                    throws IOException {
201    
202                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
203    
204                    return new IndexSearcher(indexAccessor.getLuceneDir(), readOnly);
205            }
206    
207            public String getSnippet(
208                            Query query, String field, String s, int maxNumFragments,
209                            int fragmentLength, String fragmentSuffix, String preTag,
210                            String postTag)
211                    throws IOException {
212    
213                    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
214                            preTag, postTag);
215    
216                    QueryScorer queryScorer = new QueryScorer(query, field);
217    
218                    Highlighter highlighter = new Highlighter(
219                            simpleHTMLFormatter, queryScorer);
220    
221                    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
222    
223                    TokenStream tokenStream = getAnalyzer().tokenStream(
224                            field, new UnsyncStringReader(s));
225    
226                    try {
227                            String snippet = highlighter.getBestFragments(
228                                    tokenStream, s, maxNumFragments, fragmentSuffix);
229    
230                            if (Validator.isNotNull(snippet) &&
231                                    !StringUtil.endsWith(snippet, fragmentSuffix)) {
232    
233                                    snippet = snippet + fragmentSuffix;
234                            }
235    
236                            return snippet;
237                    }
238                    catch (InvalidTokenOffsetsException itoe) {
239                            throw new IOException(itoe.getMessage());
240                    }
241            }
242    
243            public Version getVersion() {
244                    return _version;
245            }
246    
247            public void updateDocument(long companyId, Term term, Document document)
248                    throws IOException {
249    
250                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
251    
252                    indexAccessor.updateDocument(term, document);
253            }
254    
255            public void shutdown() {
256                    for (IndexAccessor indexAccessor : _indexAccessorMap.values()) {
257                            indexAccessor.close();
258                    }
259            }
260    
261            private LuceneHelperImpl() {
262                    String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
263    
264                    if (Validator.isNotNull(analyzerName)) {
265                            try {
266                                    _analyzerClass = Class.forName(analyzerName);
267                            }
268                            catch (Exception e) {
269                                    _log.error(e);
270                            }
271                    }
272            }
273    
274            private IndexAccessor _getIndexAccessor(long companyId) {
275                    IndexAccessor indexAccessor = _indexAccessorMap.get(companyId);
276    
277                    if (indexAccessor == null) {
278                            synchronized (this) {
279                                    indexAccessor = _indexAccessorMap.get(companyId);
280    
281                                    if (indexAccessor == null) {
282                                            indexAccessor = new IndexAccessorImpl(companyId);
283    
284                                            _indexAccessorMap.put(companyId, indexAccessor);
285                                    }
286                            }
287                    }
288    
289                    return indexAccessor;
290            }
291    
292            private static Log _log = LogFactoryUtil.getLog(LuceneHelperImpl.class);
293    
294            private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
295            private Map<Long, IndexAccessor> _indexAccessorMap =
296                    new ConcurrentHashMap<Long, IndexAccessor>();
297            private Version _version = Version.LUCENE_24;
298    
299    }