001
014
015 package com.liferay.portal.metadata;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.io.DummyWriter;
019 import com.liferay.portal.kernel.log.Log;
020 import com.liferay.portal.kernel.log.LogFactoryUtil;
021 import com.liferay.portal.kernel.util.StreamUtil;
022
023 import java.io.File;
024 import java.io.FileInputStream;
025 import java.io.IOException;
026 import java.io.InputStream;
027
028 import org.apache.tika.metadata.Metadata;
029 import org.apache.tika.parser.ParseContext;
030 import org.apache.tika.parser.Parser;
031 import org.apache.tika.sax.WriteOutContentHandler;
032
033 import org.xml.sax.ContentHandler;
034
035
040 public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor {
041
042 @Override
043 public Metadata extractMetadata(
044 String extension, String mimeType, File file)
045 throws SystemException {
046
047 Metadata metadata = super.extractMetadata(extension, mimeType, file);
048
049 InputStream inputStream = null;
050
051 try {
052 inputStream = new FileInputStream(file);
053
054 return extractMetadata(inputStream, metadata);
055 }
056 catch (IOException ioe) {
057 throw new SystemException(ioe);
058 }
059 finally {
060 StreamUtil.cleanUp(inputStream);
061 }
062 }
063
064 @Override
065 public Metadata extractMetadata(
066 String extension, String mimeType, InputStream inputStream)
067 throws SystemException {
068
069 Metadata metadata = super.extractMetadata(
070 extension, mimeType, inputStream);
071
072 try {
073 return extractMetadata(inputStream, metadata);
074 }
075 catch (IOException ioe) {
076 throw new SystemException(ioe);
077 }
078 }
079
080 public void setParser(Parser parser) {
081 _parser = parser;
082 }
083
084 protected Metadata extractMetadata(
085 InputStream inputStream, Metadata metadata)
086 throws IOException {
087
088 if (metadata == null) {
089 metadata = new Metadata();
090 }
091
092 ParseContext parserContext = new ParseContext();
093
094 parserContext.set(Parser.class, _parser);
095
096 ContentHandler contentHandler = new WriteOutContentHandler(
097 new DummyWriter());
098
099 try {
100 _parser.parse(inputStream, contentHandler, metadata, parserContext);
101 }
102 catch (Exception e) {
103 _log.error("Unable to parse", e);
104
105 throw new IOException(e.getMessage());
106 }
107
108
109
110 metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName());
111 metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName());
112
113 return metadata;
114 }
115
116 private static Log _log = LogFactoryUtil.getLog(
117 TikaRawMetadataProcessor.class);
118
119 private Parser _parser;
120
121 }