001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.util.ContentTypes;
021    import com.liferay.portal.kernel.util.GetterUtil;
022    import com.liferay.portal.kernel.util.MimeTypes;
023    import com.liferay.portal.kernel.util.StreamUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    
026    import java.io.File;
027    import java.io.FileNotFoundException;
028    import java.io.InputStream;
029    
030    import java.net.URL;
031    
032    import java.util.Collections;
033    import java.util.HashMap;
034    import java.util.HashSet;
035    import java.util.Map;
036    import java.util.Set;
037    
038    import javax.xml.parsers.DocumentBuilder;
039    import javax.xml.parsers.DocumentBuilderFactory;
040    
041    import org.apache.tika.detect.DefaultDetector;
042    import org.apache.tika.detect.Detector;
043    import org.apache.tika.io.TikaInputStream;
044    import org.apache.tika.metadata.Metadata;
045    import org.apache.tika.mime.MediaType;
046    import org.apache.tika.mime.MimeTypesReaderMetKeys;
047    
048    import org.w3c.dom.Document;
049    import org.w3c.dom.Element;
050    import org.w3c.dom.Node;
051    import org.w3c.dom.NodeList;
052    
053    import org.xml.sax.InputSource;
054    
055    /**
056     * @author Jorge Ferrer
057     * @author Brian Wing Shun Chan
058     * @author Alexander Chow
059     */
060    public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
061    
062            public MimeTypesImpl() {
063                    _detector = new DefaultDetector(
064                            org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
065    
066                    URL url = org.apache.tika.mime.MimeTypes.class.getResource(
067                            "tika-mimetypes.xml");
068    
069                    try {
070                            read(url.openStream());
071                    }
072                    catch (Exception e) {
073                            _log.error("Unable to populate extensions map", e);
074                    }
075            }
076    
077            public String getContentType(File file) {
078                    return getContentType(file, file.getName());
079            }
080    
081            public String getContentType(File file, String title) {
082                    InputStream is = null;
083    
084                    try {
085                            is = TikaInputStream.get(file);
086    
087                            return getContentType(is, title);
088                    }
089                    catch (FileNotFoundException fnfe) {
090                            return getContentType(title);
091                    }
092                    finally {
093                            StreamUtil.cleanUp(is);
094                    }
095            }
096    
097            public String getContentType(InputStream inputStream, String fileName) {
098                    if ((inputStream == null) && Validator.isNull(fileName)) {
099                            return ContentTypes.APPLICATION_OCTET_STREAM;
100                    }
101    
102                    String contentType = null;
103    
104                    try {
105                            Metadata metadata = new Metadata();
106    
107                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
108    
109                            MediaType mediaType = _detector.detect(
110                                    TikaInputStream.get(inputStream), metadata);
111    
112                            contentType = mediaType.toString();
113    
114                            if (contentType.contains("tika")) {
115                                    if (_log.isDebugEnabled()) {
116                                            _log.debug("Retrieved invalid content type " + contentType);
117                                    }
118    
119                                    contentType = getContentType(fileName);
120                            }
121    
122                            if (contentType.contains("tika")) {
123                                    if (_log.isDebugEnabled()) {
124                                            _log.debug("Retrieved invalid content type " + contentType);
125                                    }
126    
127                                    contentType = ContentTypes.APPLICATION_OCTET_STREAM;
128                            }
129                    }
130                    catch (Exception e) {
131                            _log.error(e, e);
132    
133                            contentType = ContentTypes.APPLICATION_OCTET_STREAM;
134                    }
135    
136                    return contentType;
137            }
138    
139            public String getContentType(String fileName) {
140                    if (Validator.isNull(fileName)) {
141                            return ContentTypes.APPLICATION_OCTET_STREAM;
142                    }
143    
144                    try {
145                            Metadata metadata = new Metadata();
146    
147                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
148    
149                            MediaType mediaType = _detector.detect(null, metadata);
150    
151                            String contentType = mediaType.toString();
152    
153                            if (!contentType.contains("tika")) {
154                                    return contentType;
155                            }
156                            else if (_log.isDebugEnabled()) {
157                                    _log.debug("Retrieved invalid content type " + contentType);
158                            }
159                    }
160                    catch (Exception e) {
161                            _log.error(e, e);
162                    }
163    
164                    return ContentTypes.APPLICATION_OCTET_STREAM;
165            }
166    
167            public Set<String> getExtensions(String contentType) {
168                    Set<String> extensions = _extensionsMap.get(contentType);
169    
170                    if (extensions == null) {
171                            extensions = Collections.emptySet();
172                    }
173    
174                    return extensions;
175            }
176    
177            protected void read(InputStream stream) throws Exception {
178                    DocumentBuilderFactory documentBuilderFactory =
179                            DocumentBuilderFactory.newInstance();
180    
181                    DocumentBuilder documentBuilder =
182                            documentBuilderFactory.newDocumentBuilder();
183    
184                    Document document = documentBuilder.parse(new InputSource(stream));
185    
186                    Element element = document.getDocumentElement();
187    
188                    if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
189                            throw new SystemException("Invalid configuration file");
190                    }
191    
192                    NodeList nodeList = element.getChildNodes();
193    
194                    for (int i = 0; i < nodeList.getLength(); i++) {
195                            Node node = nodeList.item(i);
196    
197                            if (node.getNodeType() != Node.ELEMENT_NODE) {
198                                    continue;
199                            }
200    
201                            Element childElement = (Element)node;
202    
203                            if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
204                                    readMimeType(childElement);
205                            }
206                    }
207            }
208    
209            protected void readMimeType(Element element) {
210                    Set<String> mimeTypes = new HashSet<String>();
211    
212                    Set<String> extensions = new HashSet<String>();
213    
214                    String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
215    
216                    mimeTypes.add(name);
217    
218                    NodeList nodeList = element.getChildNodes();
219    
220                    for (int i = 0; i < nodeList.getLength(); i++) {
221                            Node node = nodeList.item(i);
222    
223                            if (node.getNodeType() != Node.ELEMENT_NODE) {
224                                    continue;
225                            }
226    
227                            Element childElement = (Element)node;
228    
229                            if (ALIAS_TAG.equals(childElement.getTagName())) {
230                                    String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
231    
232                                    mimeTypes.add(alias);
233                            }
234                            else if (GLOB_TAG.equals(childElement.getTagName())) {
235                                    boolean isRegex = GetterUtil.getBoolean(
236                                            childElement.getAttribute(ISREGEX_ATTR));
237    
238                                    if (isRegex) {
239                                            continue;
240                                    }
241    
242                                    String pattern = childElement.getAttribute(PATTERN_ATTR);
243    
244                                    if (!pattern.startsWith("*")) {
245                                            continue;
246                                    }
247    
248                                    String extension = pattern.substring(1);
249    
250                                    if (!extension.contains("*") && !extension.contains("?") &&
251                                            !extension.contains("[")) {
252    
253                                            extensions.add(extension);
254                                    }
255                            }
256                    }
257    
258                    for (String mimeType : mimeTypes) {
259                            _extensionsMap.put(mimeType, extensions);
260                    }
261            }
262    
263            private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
264    
265            private Detector _detector;
266            private Map<String, Set<String>> _extensionsMap =
267                    new HashMap<String, Set<String>>();
268    
269    }