001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.StreamUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.io.File;
027 import java.io.FileNotFoundException;
028 import java.io.InputStream;
029
030 import java.net.URL;
031
032 import java.util.Collections;
033 import java.util.HashMap;
034 import java.util.HashSet;
035 import java.util.Map;
036 import java.util.Set;
037
038 import javax.xml.parsers.DocumentBuilder;
039 import javax.xml.parsers.DocumentBuilderFactory;
040
041 import org.apache.tika.detect.DefaultDetector;
042 import org.apache.tika.detect.Detector;
043 import org.apache.tika.io.TikaInputStream;
044 import org.apache.tika.metadata.Metadata;
045 import org.apache.tika.mime.MediaType;
046 import org.apache.tika.mime.MimeTypesReaderMetKeys;
047
048 import org.w3c.dom.Document;
049 import org.w3c.dom.Element;
050 import org.w3c.dom.Node;
051 import org.w3c.dom.NodeList;
052
053 import org.xml.sax.InputSource;
054
055
060 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
061
062 public MimeTypesImpl() {
063 _detector = new DefaultDetector(
064 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
065
066 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
067 "tika-mimetypes.xml");
068
069 try {
070 read(url.openStream());
071 }
072 catch (Exception e) {
073 _log.error("Unable to populate extensions map", e);
074 }
075 }
076
077 public String getContentType(File file) {
078 return getContentType(file, file.getName());
079 }
080
081 public String getContentType(File file, String title) {
082 InputStream is = null;
083
084 try {
085 is = TikaInputStream.get(file);
086
087 return getContentType(is, title);
088 }
089 catch (FileNotFoundException fnfe) {
090 return getContentType(title);
091 }
092 finally {
093 StreamUtil.cleanUp(is);
094 }
095 }
096
097 public String getContentType(InputStream inputStream, String fileName) {
098 if ((inputStream == null) && Validator.isNull(fileName)) {
099 return ContentTypes.APPLICATION_OCTET_STREAM;
100 }
101
102 String contentType = null;
103
104 try {
105 Metadata metadata = new Metadata();
106
107 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
108
109 MediaType mediaType = _detector.detect(
110 TikaInputStream.get(inputStream), metadata);
111
112 contentType = mediaType.toString();
113
114 if (contentType.contains("tika")) {
115 if (_log.isDebugEnabled()) {
116 _log.debug("Retrieved invalid content type " + contentType);
117 }
118
119 contentType = getContentType(fileName);
120 }
121
122 if (contentType.contains("tika")) {
123 if (_log.isDebugEnabled()) {
124 _log.debug("Retrieved invalid content type " + contentType);
125 }
126
127 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
128 }
129 }
130 catch (Exception e) {
131 _log.error(e, e);
132
133 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
134 }
135
136 return contentType;
137 }
138
139 public String getContentType(String fileName) {
140 if (Validator.isNull(fileName)) {
141 return ContentTypes.APPLICATION_OCTET_STREAM;
142 }
143
144 try {
145 Metadata metadata = new Metadata();
146
147 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
148
149 MediaType mediaType = _detector.detect(null, metadata);
150
151 String contentType = mediaType.toString();
152
153 if (!contentType.contains("tika")) {
154 return contentType;
155 }
156 else if (_log.isDebugEnabled()) {
157 _log.debug("Retrieved invalid content type " + contentType);
158 }
159 }
160 catch (Exception e) {
161 _log.error(e, e);
162 }
163
164 return ContentTypes.APPLICATION_OCTET_STREAM;
165 }
166
167 public Set<String> getExtensions(String contentType) {
168 Set<String> extensions = _extensionsMap.get(contentType);
169
170 if (extensions == null) {
171 extensions = Collections.emptySet();
172 }
173
174 return extensions;
175 }
176
177 protected void read(InputStream stream) throws Exception {
178 DocumentBuilderFactory documentBuilderFactory =
179 DocumentBuilderFactory.newInstance();
180
181 DocumentBuilder documentBuilder =
182 documentBuilderFactory.newDocumentBuilder();
183
184 Document document = documentBuilder.parse(new InputSource(stream));
185
186 Element element = document.getDocumentElement();
187
188 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
189 throw new SystemException("Invalid configuration file");
190 }
191
192 NodeList nodeList = element.getChildNodes();
193
194 for (int i = 0; i < nodeList.getLength(); i++) {
195 Node node = nodeList.item(i);
196
197 if (node.getNodeType() != Node.ELEMENT_NODE) {
198 continue;
199 }
200
201 Element childElement = (Element)node;
202
203 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
204 readMimeType(childElement);
205 }
206 }
207 }
208
209 protected void readMimeType(Element element) {
210 Set<String> mimeTypes = new HashSet<String>();
211
212 Set<String> extensions = new HashSet<String>();
213
214 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
215
216 mimeTypes.add(name);
217
218 NodeList nodeList = element.getChildNodes();
219
220 for (int i = 0; i < nodeList.getLength(); i++) {
221 Node node = nodeList.item(i);
222
223 if (node.getNodeType() != Node.ELEMENT_NODE) {
224 continue;
225 }
226
227 Element childElement = (Element)node;
228
229 if (ALIAS_TAG.equals(childElement.getTagName())) {
230 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
231
232 mimeTypes.add(alias);
233 }
234 else if (GLOB_TAG.equals(childElement.getTagName())) {
235 boolean isRegex = GetterUtil.getBoolean(
236 childElement.getAttribute(ISREGEX_ATTR));
237
238 if (isRegex) {
239 continue;
240 }
241
242 String pattern = childElement.getAttribute(PATTERN_ATTR);
243
244 if (!pattern.startsWith("*")) {
245 continue;
246 }
247
248 String extension = pattern.substring(1);
249
250 if (!extension.contains("*") && !extension.contains("?") &&
251 !extension.contains("[")) {
252
253 extensions.add(extension);
254 }
255 }
256 }
257
258 for (String mimeType : mimeTypes) {
259 _extensionsMap.put(mimeType, extensions);
260 }
261 }
262
263 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
264
265 private Detector _detector;
266 private Map<String, Set<String>> _extensionsMap =
267 new HashMap<String, Set<String>>();
268
269 }