001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.metadata;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.io.DummyWriter;
019    import com.liferay.portal.kernel.log.Log;
020    import com.liferay.portal.kernel.log.LogFactoryUtil;
021    import com.liferay.portal.kernel.util.StreamUtil;
022    
023    import java.io.File;
024    import java.io.FileInputStream;
025    import java.io.IOException;
026    import java.io.InputStream;
027    
028    import org.apache.tika.metadata.Metadata;
029    import org.apache.tika.parser.ParseContext;
030    import org.apache.tika.parser.Parser;
031    import org.apache.tika.sax.WriteOutContentHandler;
032    
033    import org.xml.sax.ContentHandler;
034    
035    /**
036     * @author Miguel Pastor
037     * @author Alexander Chow
038     * @author Shuyang Zhou
039     */
040    public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor {
041    
042            @Override
043            public Metadata extractMetadata(
044                            String extension, String mimeType, File file)
045                    throws SystemException {
046    
047                    Metadata metadata = super.extractMetadata(extension, mimeType, file);
048    
049                    InputStream inputStream = null;
050    
051                    try {
052                            inputStream = new FileInputStream(file);
053    
054                            return extractMetadata(inputStream, metadata);
055                    }
056                    catch (IOException ioe) {
057                            throw new SystemException(ioe);
058                    }
059                    finally {
060                            StreamUtil.cleanUp(inputStream);
061                    }
062            }
063    
064            @Override
065            public Metadata extractMetadata(
066                            String extension, String mimeType, InputStream inputStream)
067                    throws SystemException {
068    
069                    Metadata metadata = super.extractMetadata(
070                            extension, mimeType, inputStream);
071    
072                    try {
073                            return extractMetadata(inputStream, metadata);
074                    }
075                    catch (IOException ioe) {
076                            throw new SystemException(ioe);
077                    }
078            }
079    
080            public void setParser(Parser parser) {
081                    _parser = parser;
082            }
083    
084            protected Metadata extractMetadata(
085                            InputStream inputStream, Metadata metadata)
086                    throws IOException {
087    
088                    if (metadata == null) {
089                            metadata = new Metadata();
090                    }
091    
092                    ParseContext parserContext = new ParseContext();
093    
094                    parserContext.set(Parser.class, _parser);
095    
096                    ContentHandler contentHandler = new WriteOutContentHandler(
097                            new DummyWriter());
098    
099                    try {
100                            _parser.parse(inputStream, contentHandler, metadata, parserContext);
101                    }
102                    catch (Exception e) {
103                            _log.error("Unable to parse", e);
104    
105                            throw new IOException(e.getMessage());
106                    }
107    
108                    // Remove potential security risks
109    
110                    metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName());
111                    metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName());
112    
113                    return metadata;
114            }
115    
116            private static Log _log = LogFactoryUtil.getLog(
117                    TikaRawMetadataProcessor.class);
118    
119            private Parser _parser;
120    
121    }