001    /**
002     * Copyright (c) 2000-2011 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
018    import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
019    import com.liferay.portal.kernel.log.Log;
020    import com.liferay.portal.kernel.log.LogFactoryUtil;
021    import com.liferay.portal.kernel.util.ContentTypes;
022    import com.liferay.portal.kernel.util.FileComparator;
023    import com.liferay.portal.kernel.util.JavaProps;
024    import com.liferay.portal.kernel.util.MimeTypesUtil;
025    import com.liferay.portal.kernel.util.ServerDetector;
026    import com.liferay.portal.kernel.util.StreamUtil;
027    import com.liferay.portal.kernel.util.StringBundler;
028    import com.liferay.portal.kernel.util.StringPool;
029    import com.liferay.portal.kernel.util.StringUtil;
030    import com.liferay.portal.kernel.util.Time;
031    import com.liferay.portal.kernel.util.Validator;
032    import com.liferay.util.PwdGenerator;
033    import com.liferay.util.SystemProperties;
034    import com.liferay.util.lucene.JerichoHTMLTextExtractor;
035    
036    import java.io.BufferedInputStream;
037    import java.io.File;
038    import java.io.FileInputStream;
039    import java.io.FileOutputStream;
040    import java.io.FileReader;
041    import java.io.IOException;
042    import java.io.InputStream;
043    import java.io.OutputStreamWriter;
044    import java.io.RandomAccessFile;
045    import java.io.Reader;
046    import java.io.StringReader;
047    import java.io.Writer;
048    
049    import java.util.ArrayList;
050    import java.util.Arrays;
051    import java.util.HashMap;
052    import java.util.List;
053    import java.util.Map;
054    import java.util.Properties;
055    
056    import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
057    import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
058    import org.apache.jackrabbit.extractor.MsWordTextExtractor;
059    import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
060    import org.apache.jackrabbit.extractor.PdfTextExtractor;
061    import org.apache.jackrabbit.extractor.PlainTextExtractor;
062    import org.apache.jackrabbit.extractor.RTFTextExtractor;
063    import org.apache.jackrabbit.extractor.TextExtractor;
064    import org.apache.jackrabbit.extractor.XMLTextExtractor;
065    import org.apache.poi.POITextExtractor;
066    import org.apache.poi.extractor.ExtractorFactory;
067    
068    import org.mozilla.intl.chardet.nsDetector;
069    import org.mozilla.intl.chardet.nsPSMDetector;
070    
071    /**
072     * @author Brian Wing Shun Chan
073     * @author Alexander Chow
074     */
075    public class FileImpl implements com.liferay.portal.kernel.util.File {
076    
077            public static FileImpl getInstance() {
078                    return _instance;
079            }
080    
081            public FileImpl() {
082                    Class<?>[] textExtractorClasses = new Class[] {
083                            JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
084                            MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
085                            OpenOfficeTextExtractor.class, PdfTextExtractor.class,
086                            PlainTextExtractor.class, RTFTextExtractor.class,
087                            XMLTextExtractor.class
088                    };
089    
090                    for (Class<?> textExtractorClass : textExtractorClasses) {
091                            try {
092                                    TextExtractor textExtractor =
093                                            (TextExtractor)textExtractorClass.newInstance();
094    
095                                    String[] contentTypes = textExtractor.getContentTypes();
096    
097                                    for (String contentType : contentTypes) {
098                                            _textExtractors.put(contentType, textExtractor);
099                                    }
100                            }
101                            catch (Exception e) {
102                                    _log.error(e, e);
103                            }
104                    }
105            }
106    
107            public void copyDirectory(String sourceDirName, String destinationDirName) {
108                    copyDirectory(new File(sourceDirName), new File(destinationDirName));
109            }
110    
111            public void copyDirectory(File source, File destination) {
112                    if (source.exists() && source.isDirectory()) {
113                            if (!destination.exists()) {
114                                    destination.mkdirs();
115                            }
116    
117                            File[] fileArray = source.listFiles();
118    
119                            for (int i = 0; i < fileArray.length; i++) {
120                                    if (fileArray[i].isDirectory()) {
121                                            copyDirectory(
122                                                    fileArray[i],
123                                                    new File(destination.getPath() + File.separator
124                                                            + fileArray[i].getName()));
125                                    }
126                                    else {
127                                            copyFile(
128                                                    fileArray[i],
129                                                    new File(destination.getPath() + File.separator
130                                                            + fileArray[i].getName()));
131                                    }
132                            }
133                    }
134            }
135    
136            public void copyFile(String source, String destination) {
137                    copyFile(source, destination, false);
138            }
139    
140            public void copyFile(String source, String destination, boolean lazy) {
141                    copyFile(new File(source), new File(destination), lazy);
142            }
143    
144            public void copyFile(File source, File destination) {
145                    copyFile(source, destination, false);
146            }
147    
148            public void copyFile(File source, File destination, boolean lazy) {
149                    if (!source.exists()) {
150                            return;
151                    }
152    
153                    if (lazy) {
154                            String oldContent = null;
155    
156                            try {
157                                    oldContent = read(source);
158                            }
159                            catch (Exception e) {
160                                    return;
161                            }
162    
163                            String newContent = null;
164    
165                            try {
166                                    newContent = read(destination);
167                            }
168                            catch (Exception e) {
169                            }
170    
171                            if ((oldContent == null) || !oldContent.equals(newContent)) {
172                                    copyFile(source, destination, false);
173                            }
174                    }
175                    else {
176                            if ((destination.getParentFile() != null) &&
177                                    (!destination.getParentFile().exists())) {
178    
179                                    destination.getParentFile().mkdirs();
180                            }
181    
182                            try {
183                                    StreamUtil.transfer(
184                                            new FileInputStream(source),
185                                            new FileOutputStream(destination));
186                            }
187                            catch (IOException ioe) {
188                                    _log.error(ioe.getMessage());
189                            }
190                    }
191            }
192    
193            public File createTempFile() {
194                    return createTempFile(null);
195            }
196    
197            public File createTempFile(String extension) {
198                    return new File(createTempFileName(extension));
199            }
200    
201            public String createTempFileName() {
202                    return createTempFileName(null);
203            }
204    
205            public String createTempFileName(String extension) {
206                    StringBundler sb = new StringBundler();
207    
208                    sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
209                    sb.append(StringPool.SLASH);
210                    sb.append(Time.getTimestamp());
211                    sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
212    
213                    if (Validator.isNotNull(extension)) {
214                            sb.append(StringPool.PERIOD);
215                            sb.append(extension);
216                    }
217    
218                    return sb.toString();
219            }
220    
221            public String decodeSafeFileName(String fileName) {
222                    return StringUtil.replace(
223                            fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
224            }
225    
226            public boolean delete(String file) {
227                    return delete(new File(file));
228            }
229    
230            public boolean delete(File file) {
231                    if ((file != null) && file.exists()) {
232                            return file.delete();
233                    }
234                    else {
235                            return false;
236                    }
237            }
238    
239            public void deltree(String directory) {
240                    deltree(new File(directory));
241            }
242    
243            public void deltree(File directory) {
244                    if (directory.exists() && directory.isDirectory()) {
245                            File[] fileArray = directory.listFiles();
246    
247                            for (int i = 0; i < fileArray.length; i++) {
248                                    if (fileArray[i].isDirectory()) {
249                                            deltree(fileArray[i]);
250                                    }
251                                    else {
252                                            fileArray[i].delete();
253                                    }
254                            }
255    
256                            directory.delete();
257                    }
258            }
259    
260            public String encodeSafeFileName(String fileName) {
261                    if (fileName == null) {
262                            return StringPool.BLANK;
263                    }
264    
265                    return StringUtil.replace(
266                            fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
267            }
268    
269            public boolean exists(String fileName) {
270                    return exists(new File(fileName));
271            }
272    
273            public boolean exists(File file) {
274                    return file.exists();
275            }
276    
277            public String extractText(InputStream is, String fileName) {
278                    String text = null;
279    
280                    try {
281                            if (!is.markSupported()) {
282                                    is = new BufferedInputStream(is);
283                            }
284    
285                            String contentType = MimeTypesUtil.getContentType(is, fileName);
286    
287                            if (_log.isInfoEnabled()) {
288                                    _log.info(
289                                            "Attempting to extract text from " + fileName +
290                                                    " of type " + contentType);
291                            }
292    
293                            TextExtractor textExtractor = _textExtractors.get(contentType);
294    
295                            if (textExtractor != null) {
296                                    if (_log.isInfoEnabled()) {
297                                            _log.info(
298                                                    "Using text extractor " +
299                                                            textExtractor.getClass().getName());
300                                    }
301    
302                                    StringBuilder sb = new StringBuilder();
303    
304                                    Reader reader = null;
305    
306                                    if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
307                                            contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
308    
309                                            if (_log.isWarnEnabled()) {
310                                                    _log.warn(
311                                                            "JOnAS 5 with JDK 6 has a known issue with text " +
312                                                                    "extraction of Word documents. Use JDK 5 if " +
313                                                                            "you require indexing of Word documents.");
314                                            }
315    
316                                            if (_log.isDebugEnabled()) {
317    
318                                                    // Execute code that will generate the error so it can
319                                                    // be fixed at a later date
320    
321                                                    reader = textExtractor.extractText(
322                                                            is, contentType, null);
323                                            }
324                                            else {
325                                                    reader = new StringReader(StringPool.BLANK);
326                                            }
327                                    }
328                                    else {
329                                            reader = textExtractor.extractText(
330                                                    is, contentType, null);
331                                    }
332    
333                                    try{
334                                            char[] buffer = new char[1024];
335    
336                                            int result = -1;
337    
338                                            while ((result = reader.read(buffer)) != -1) {
339                                                    sb.append(buffer, 0, result);
340                                            }
341                                    }
342                                    finally {
343                                            try {
344                                                    reader.close();
345                                            }
346                                            catch (IOException ioe) {
347                                            }
348                                    }
349    
350                                    text = sb.toString();
351                            }
352                            else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
353                                    contentType.startsWith(
354                                            "application/vnd.openxmlformats-officedocument.")) {
355    
356                                    try {
357                                            POITextExtractor poiTextExtractor =
358                                                    ExtractorFactory.createExtractor(is);
359    
360                                            text = poiTextExtractor.getText();
361                                    }
362                                    catch (Exception e) {
363                                            if (_log.isInfoEnabled()) {
364                                                    _log.info(e.getMessage());
365                                            }
366                                    }
367                            }
368                    }
369                    catch (Exception e) {
370                            _log.error(e, e);
371                    }
372    
373                    if (_log.isInfoEnabled()) {
374                            if (text == null) {
375                                    _log.info("No text extractor found for " + fileName);
376                            }
377                            else {
378                                    _log.info("Text was extracted for " + fileName);
379                            }
380                    }
381    
382                    if (_log.isDebugEnabled()) {
383                            _log.debug("Extractor returned text:\n\n" + text);
384                    }
385    
386                    if (text == null) {
387                            text = StringPool.BLANK;
388                    }
389    
390                    return text;
391            }
392    
393            public String getAbsolutePath(File file) {
394                    return StringUtil.replace(
395                            file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
396            }
397    
398            public byte[] getBytes(File file) throws IOException {
399                    if ((file == null) || !file.exists()) {
400                            return null;
401                    }
402    
403                    RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
404    
405                    byte[] bytes = new byte[(int)randomAccessFile.length()];
406    
407                    randomAccessFile.readFully(bytes);
408    
409                    randomAccessFile.close();
410    
411                    return bytes;
412            }
413    
414            public byte[] getBytes(InputStream is) throws IOException {
415                    return getBytes(is, -1);
416            }
417    
418            public byte[] getBytes(InputStream inputStream, int bufferSize)
419                    throws IOException {
420    
421                    UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
422                            new UnsyncByteArrayOutputStream();
423    
424                    StreamUtil.transfer(
425                            inputStream, unsyncByteArrayOutputStream, bufferSize);
426    
427                    return unsyncByteArrayOutputStream.toByteArray();
428            }
429    
430            public String getExtension(String fileName) {
431                    if (fileName == null) {
432                            return null;
433                    }
434    
435                    int pos = fileName.lastIndexOf(StringPool.PERIOD);
436    
437                    if (pos > 0) {
438                            return fileName.substring(pos + 1, fileName.length()).toLowerCase();
439                    }
440                    else {
441                            return StringPool.BLANK;
442                    }
443            }
444    
445            public String getPath(String fullFileName) {
446                    int pos = fullFileName.lastIndexOf(StringPool.SLASH);
447    
448                    if (pos == -1) {
449                            pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
450                    }
451    
452                    String shortFileName = fullFileName.substring(0, pos);
453    
454                    if (Validator.isNull(shortFileName)) {
455                            return StringPool.SLASH;
456                    }
457    
458                    return shortFileName;
459            }
460    
461            public String getShortFileName(String fullFileName) {
462                    int pos = fullFileName.lastIndexOf(StringPool.SLASH);
463    
464                    if (pos == -1) {
465                            pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
466                    }
467    
468                    String shortFileName =
469                            fullFileName.substring(pos + 1, fullFileName.length());
470    
471                    return shortFileName;
472            }
473    
474            public boolean isAscii(File file) throws IOException {
475                    boolean ascii = true;
476    
477                    nsDetector detector = new nsDetector(nsPSMDetector.ALL);
478    
479                    InputStream inputStream = new FileInputStream(file);
480    
481                    byte[] buffer = new byte[1024];
482    
483                    int len = 0;
484    
485                    while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
486                            if (ascii) {
487                                    ascii = detector.isAscii(buffer, len);
488    
489                                    if (!ascii) {
490                                            break;
491                                    }
492                            }
493                    }
494    
495                    detector.DataEnd();
496    
497                    inputStream.close();
498    
499                    return ascii;
500            }
501    
502            public String[] listDirs(String fileName) {
503                    return listDirs(new File(fileName));
504            }
505    
506            public String[] listDirs(File file) {
507                    List<String> dirs = new ArrayList<String>();
508    
509                    File[] fileArray = file.listFiles();
510    
511                    for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
512                            if (fileArray[i].isDirectory()) {
513                                    dirs.add(fileArray[i].getName());
514                            }
515                    }
516    
517                    return dirs.toArray(new String[dirs.size()]);
518            }
519    
520            public String[] listFiles(String fileName) {
521                    if (Validator.isNull(fileName)) {
522                            return new String[0];
523                    }
524    
525                    return listFiles(new File(fileName));
526            }
527    
528            public String[] listFiles(File file) {
529                    List<String> files = new ArrayList<String>();
530    
531                    File[] fileArray = file.listFiles();
532    
533                    for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
534                            if (fileArray[i].isFile()) {
535                                    files.add(fileArray[i].getName());
536                            }
537                    }
538    
539                    return files.toArray(new String[files.size()]);
540            }
541    
542            public void mkdirs(String pathName) {
543                    File file = new File(pathName);
544    
545                    file.mkdirs();
546            }
547    
548            public boolean move(String sourceFileName, String destinationFileName) {
549                    return move(new File(sourceFileName), new File(destinationFileName));
550            }
551    
552            public boolean move(File source, File destination) {
553                    if (!source.exists()) {
554                            return false;
555                    }
556    
557                    destination.delete();
558    
559                    return source.renameTo(destination);
560            }
561    
562            public String read(String fileName) throws IOException {
563                    return read(new File(fileName));
564            }
565    
566            public String read(File file) throws IOException {
567                    return read(file, false);
568            }
569    
570            public String read(File file, boolean raw) throws IOException {
571                    byte[] bytes = getBytes(file);
572    
573                    if (bytes == null) {
574                            return null;
575                    }
576    
577                    String s = new String(bytes, StringPool.UTF8);
578    
579                    if (raw) {
580                            return s;
581                    }
582                    else {
583                            return StringUtil.replace(
584                                    s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
585                    }
586            }
587    
588            public String replaceSeparator(String fileName) {
589                    return StringUtil.replace(
590                            fileName, StringPool.BACK_SLASH, StringPool.SLASH);
591            }
592    
593            public File[] sortFiles(File[] files) {
594                    if (files == null) {
595                            return null;
596                    }
597    
598                    Arrays.sort(files, new FileComparator());
599    
600                    List<File> directoryList = new ArrayList<File>();
601                    List<File> fileList = new ArrayList<File>();
602    
603                    for (int i = 0; i < files.length; i++) {
604                            if (files[i].isDirectory()) {
605                                    directoryList.add(files[i]);
606                            }
607                            else {
608                                    fileList.add(files[i]);
609                            }
610                    }
611    
612                    directoryList.addAll(fileList);
613    
614                    return directoryList.toArray(new File[directoryList.size()]);
615            }
616    
617            public String stripExtension(String fileName) {
618                    if (fileName == null) {
619                            return null;
620                    }
621    
622                    String ext = getExtension(fileName);
623    
624                    if (ext.length() > 0) {
625                            return fileName.substring(0, fileName.length() - ext.length() - 1);
626                    }
627                    else {
628                            return fileName;
629                    }
630            }
631    
632            public List<String> toList(Reader reader) {
633                    List<String> list = new ArrayList<String>();
634    
635                    try {
636                            UnsyncBufferedReader unsyncBufferedReader =
637                                    new UnsyncBufferedReader(reader);
638    
639                            String line = null;
640    
641                            while ((line = unsyncBufferedReader.readLine()) != null) {
642                                    list.add(line);
643                            }
644    
645                            unsyncBufferedReader.close();
646                    }
647                    catch (IOException ioe) {
648                    }
649    
650                    return list;
651            }
652    
653            public List<String> toList(String fileName) {
654                    try {
655                            return toList(new FileReader(fileName));
656                    }
657                    catch (IOException ioe) {
658                            return new ArrayList<String>();
659                    }
660            }
661    
662            public Properties toProperties(FileInputStream fis) {
663                    Properties props = new Properties();
664    
665                    try {
666                            props.load(fis);
667                    }
668                    catch (IOException ioe) {
669                    }
670    
671                    return props;
672            }
673    
674            public Properties toProperties(String fileName) {
675                    try {
676                            return toProperties(new FileInputStream(fileName));
677                    }
678                    catch (IOException ioe) {
679                            return new Properties();
680                    }
681            }
682    
683            public void write(String fileName, String s) throws IOException {
684                    write(new File(fileName), s);
685            }
686    
687            public void write(String fileName, String s, boolean lazy)
688                    throws IOException {
689    
690                    write(new File(fileName), s, lazy);
691            }
692    
693            public void write(String fileName, String s, boolean lazy, boolean append)
694                    throws IOException {
695    
696                    write(new File(fileName), s, lazy, append);
697            }
698    
699            public void write(String pathName, String fileName, String s)
700                    throws IOException {
701    
702                    write(new File(pathName, fileName), s);
703            }
704    
705            public void write(String pathName, String fileName, String s, boolean lazy)
706                    throws IOException {
707    
708                    write(new File(pathName, fileName), s, lazy);
709            }
710    
711            public void write(
712                            String pathName, String fileName, String s, boolean lazy,
713                            boolean append)
714                    throws IOException {
715    
716                    write(new File(pathName, fileName), s, lazy, append);
717            }
718    
719            public void write(File file, String s) throws IOException {
720                    write(file, s, false);
721            }
722    
723            public void write(File file, String s, boolean lazy)
724                    throws IOException {
725    
726                    write(file, s, lazy, false);
727            }
728    
729            public void write(File file, String s, boolean lazy, boolean append)
730                    throws IOException {
731    
732                    if (file.getParent() != null) {
733                            mkdirs(file.getParent());
734                    }
735    
736                    if (lazy && file.exists()) {
737                            String content = read(file);
738    
739                            if (content.equals(s)) {
740                                    return;
741                            }
742                    }
743    
744                    Writer writer = new OutputStreamWriter(
745                            new FileOutputStream(file, append), StringPool.UTF8);
746    
747                    writer.write(s);
748    
749                    writer.close();
750            }
751    
752            public void write(String fileName, byte[] bytes) throws IOException {
753                    write(new File(fileName), bytes);
754            }
755    
756            public void write(File file, byte[] bytes) throws IOException {
757                    write(file, bytes, 0, bytes.length);
758            }
759    
760            public void write(File file, byte[] bytes, int offset, int length)
761                    throws IOException {
762    
763                    if (file.getParent() != null) {
764                            mkdirs(file.getParent());
765                    }
766    
767                    FileOutputStream fos = new FileOutputStream(file);
768    
769                    fos.write(bytes, offset, length);
770    
771                    fos.close();
772            }
773    
774            public void write(String fileName, InputStream is) throws IOException {
775                    write(new File(fileName), is);
776            }
777    
778            public void write(File file, InputStream is) throws IOException {
779                    if (file.getParent() != null) {
780                            mkdirs(file.getParent());
781                    }
782    
783                    StreamUtil.transfer(is, new FileOutputStream(file));
784            }
785    
786            private static final String[] _SAFE_FILE_NAME_1 = {
787                    StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
788                    StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
789            };
790    
791            private static final String[] _SAFE_FILE_NAME_2 = {
792                    "_AMP_", "_CP_", "_OP_", "_SEM_"
793            };
794    
795            private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
796    
797            private static FileImpl _instance = new FileImpl();
798    
799            private Map<String, TextExtractor> _textExtractors =
800                    new HashMap<String, TextExtractor>();
801    
802    }