1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18  import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19  import com.liferay.portal.kernel.log.Log;
20  import com.liferay.portal.kernel.log.LogFactoryUtil;
21  import com.liferay.portal.kernel.util.ContentTypes;
22  import com.liferay.portal.kernel.util.FileComparator;
23  import com.liferay.portal.kernel.util.JavaProps;
24  import com.liferay.portal.kernel.util.MimeTypesUtil;
25  import com.liferay.portal.kernel.util.ServerDetector;
26  import com.liferay.portal.kernel.util.StreamUtil;
27  import com.liferay.portal.kernel.util.StringBundler;
28  import com.liferay.portal.kernel.util.StringPool;
29  import com.liferay.portal.kernel.util.StringUtil;
30  import com.liferay.portal.kernel.util.Time;
31  import com.liferay.portal.kernel.util.Validator;
32  import com.liferay.util.PwdGenerator;
33  import com.liferay.util.SystemProperties;
34  import com.liferay.util.lucene.JerichoHTMLTextExtractor;
35  
36  import java.io.BufferedInputStream;
37  import java.io.File;
38  import java.io.FileInputStream;
39  import java.io.FileOutputStream;
40  import java.io.FileReader;
41  import java.io.IOException;
42  import java.io.InputStream;
43  import java.io.OutputStreamWriter;
44  import java.io.Reader;
45  import java.io.StringReader;
46  import java.io.Writer;
47  
48  import java.util.ArrayList;
49  import java.util.Arrays;
50  import java.util.HashMap;
51  import java.util.List;
52  import java.util.Map;
53  import java.util.Properties;
54  
55  import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
56  import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
57  import org.apache.jackrabbit.extractor.MsWordTextExtractor;
58  import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
59  import org.apache.jackrabbit.extractor.PdfTextExtractor;
60  import org.apache.jackrabbit.extractor.PlainTextExtractor;
61  import org.apache.jackrabbit.extractor.RTFTextExtractor;
62  import org.apache.jackrabbit.extractor.TextExtractor;
63  import org.apache.jackrabbit.extractor.XMLTextExtractor;
64  import org.apache.poi.POITextExtractor;
65  import org.apache.poi.extractor.ExtractorFactory;
66  
67  import org.mozilla.intl.chardet.nsDetector;
68  import org.mozilla.intl.chardet.nsPSMDetector;
69  
70  /**
71   * <a href="FileImpl.java.html"><b><i>View Source</i></b></a>
72   *
73   * @author Brian Wing Shun Chan
74   * @author Alexander Chow
75   */
76  public class FileImpl implements com.liferay.portal.kernel.util.File {
77  
78      public static FileImpl getInstance() {
79          return _instance;
80      }
81  
82      public FileImpl() {
83          Class<?>[] textExtractorClasses = new Class[] {
84              JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
85              MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
86              OpenOfficeTextExtractor.class, PdfTextExtractor.class,
87              PlainTextExtractor.class, RTFTextExtractor.class,
88              XMLTextExtractor.class
89          };
90  
91          for (Class<?> textExtractorClass : textExtractorClasses) {
92              try {
93                  TextExtractor textExtractor =
94                      (TextExtractor)textExtractorClass.newInstance();
95  
96                  String[] contentTypes = textExtractor.getContentTypes();
97  
98                  for (String contentType : contentTypes) {
99                      _textExtractors.put(contentType, textExtractor);
100                 }
101             }
102             catch (Exception e) {
103                 _log.error(e, e);
104             }
105         }
106     }
107 
108     public void copyDirectory(String sourceDirName, String destinationDirName) {
109         copyDirectory(new File(sourceDirName), new File(destinationDirName));
110     }
111 
112     public void copyDirectory(File source, File destination) {
113         if (source.exists() && source.isDirectory()) {
114             if (!destination.exists()) {
115                 destination.mkdirs();
116             }
117 
118             File[] fileArray = source.listFiles();
119 
120             for (int i = 0; i < fileArray.length; i++) {
121                 if (fileArray[i].isDirectory()) {
122                     copyDirectory(
123                         fileArray[i],
124                         new File(destination.getPath() + File.separator
125                             + fileArray[i].getName()));
126                 }
127                 else {
128                     copyFile(
129                         fileArray[i],
130                         new File(destination.getPath() + File.separator
131                             + fileArray[i].getName()));
132                 }
133             }
134         }
135     }
136 
137     public void copyFile(String source, String destination) {
138         copyFile(source, destination, false);
139     }
140 
141     public void copyFile(String source, String destination, boolean lazy) {
142         copyFile(new File(source), new File(destination), lazy);
143     }
144 
145     public void copyFile(File source, File destination) {
146         copyFile(source, destination, false);
147     }
148 
149     public void copyFile(File source, File destination, boolean lazy) {
150         if (!source.exists()) {
151             return;
152         }
153 
154         if (lazy) {
155             String oldContent = null;
156 
157             try {
158                 oldContent = read(source);
159             }
160             catch (Exception e) {
161                 return;
162             }
163 
164             String newContent = null;
165 
166             try {
167                 newContent = read(destination);
168             }
169             catch (Exception e) {
170             }
171 
172             if ((oldContent == null) || !oldContent.equals(newContent)) {
173                 copyFile(source, destination, false);
174             }
175         }
176         else {
177             if ((destination.getParentFile() != null) &&
178                 (!destination.getParentFile().exists())) {
179 
180                 destination.getParentFile().mkdirs();
181             }
182 
183             try {
184                 StreamUtil.transfer(
185                     new FileInputStream(source),
186                     new FileOutputStream(destination));
187             }
188             catch (IOException ioe) {
189                 _log.error(ioe.getMessage());
190             }
191         }
192     }
193 
194     public File createTempFile() {
195         return createTempFile(null);
196     }
197 
198     public File createTempFile(String extension) {
199         return new File(createTempFileName(extension));
200     }
201 
202     public String createTempFileName() {
203         return createTempFileName(null);
204     }
205 
206     public String createTempFileName(String extension) {
207         StringBundler sb = new StringBundler();
208 
209         sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
210         sb.append(StringPool.SLASH);
211         sb.append(Time.getTimestamp());
212         sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
213 
214         if (Validator.isNotNull(extension)) {
215             sb.append(StringPool.PERIOD);
216             sb.append(extension);
217         }
218 
219         return sb.toString();
220     }
221 
222     public String decodeSafeFileName(String fileName) {
223         return StringUtil.replace(
224             fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
225     }
226 
227     public boolean delete(String file) {
228         return delete(new File(file));
229     }
230 
231     public boolean delete(File file) {
232         if ((file != null) && file.exists()) {
233             return file.delete();
234         }
235         else {
236             return false;
237         }
238     }
239 
240     public void deltree(String directory) {
241         deltree(new File(directory));
242     }
243 
244     public void deltree(File directory) {
245         if (directory.exists() && directory.isDirectory()) {
246             File[] fileArray = directory.listFiles();
247 
248             for (int i = 0; i < fileArray.length; i++) {
249                 if (fileArray[i].isDirectory()) {
250                     deltree(fileArray[i]);
251                 }
252                 else {
253                     fileArray[i].delete();
254                 }
255             }
256 
257             directory.delete();
258         }
259     }
260 
261     public String encodeSafeFileName(String fileName) {
262         if (fileName == null) {
263             return StringPool.BLANK;
264         }
265 
266         return StringUtil.replace(
267             fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
268     }
269 
270     public boolean exists(String fileName) {
271         return exists(new File(fileName));
272     }
273 
274     public boolean exists(File file) {
275         return file.exists();
276     }
277 
278     public String extractText(InputStream is, String fileName) {
279         String text = null;
280 
281         try {
282             if (!is.markSupported()) {
283                 is = new BufferedInputStream(is);
284             }
285 
286             String contentType = MimeTypesUtil.getContentType(is, fileName);
287 
288             if (_log.isInfoEnabled()) {
289                 _log.info(
290                     "Attempting to extract text from " + fileName +
291                         " of type " + contentType);
292             }
293 
294             TextExtractor textExtractor = _textExtractors.get(contentType);
295 
296             if (textExtractor != null) {
297                 if (_log.isInfoEnabled()) {
298                     _log.info(
299                         "Using text extractor " +
300                             textExtractor.getClass().getName());
301                 }
302 
303                 StringBuilder sb = new StringBuilder();
304 
305                 Reader reader = null;
306 
307                 if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
308                     contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
309 
310                     if (_log.isWarnEnabled()) {
311                         _log.warn(
312                             "JOnAS 5 with JDK 6 has a known issue with text " +
313                                 "extraction of Word documents. Use JDK 5 if " +
314                                     "you require indexing of Word documents.");
315                     }
316 
317                     if (_log.isDebugEnabled()) {
318 
319                         // Execute code that will generate the error so it can
320                         // be fixed at a later date
321 
322                         reader = textExtractor.extractText(
323                             is, contentType, null);
324                     }
325                     else {
326                         reader = new StringReader(StringPool.BLANK);
327                     }
328                 }
329                 else {
330                     reader = textExtractor.extractText(
331                         is, contentType, null);
332                 }
333 
334                 try{
335                     char[] buffer = new char[1024];
336 
337                     int result = -1;
338 
339                     while ((result = reader.read(buffer)) != -1) {
340                         sb.append(buffer, 0, result);
341                     }
342                 }
343                 finally {
344                     try {
345                         reader.close();
346                     }
347                     catch (IOException ioe) {
348                     }
349                 }
350 
351                 text = sb.toString();
352             }
353             else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
354                 contentType.startsWith(
355                     "application/vnd.openxmlformats-officedocument.")) {
356 
357                 try {
358                     POITextExtractor poiTextExtractor =
359                         ExtractorFactory.createExtractor(is);
360 
361                     text = poiTextExtractor.getText();
362                 }
363                 catch (Exception e) {
364                     if (_log.isInfoEnabled()) {
365                         _log.info(e.getMessage());
366                     }
367                 }
368             }
369         }
370         catch (Exception e) {
371             _log.error(e, e);
372         }
373 
374         if (_log.isInfoEnabled()) {
375             if (text == null) {
376                 _log.info("No text extractor found for " + fileName);
377             }
378             else {
379                 _log.info("Text was extracted for " + fileName);
380             }
381         }
382 
383         if (_log.isDebugEnabled()) {
384             _log.debug("Extractor returned text:\n\n" + text);
385         }
386 
387         if (text == null) {
388             text = StringPool.BLANK;
389         }
390 
391         return text;
392     }
393 
394     public String getAbsolutePath(File file) {
395         return StringUtil.replace(
396             file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
397     }
398 
399     public byte[] getBytes(File file) throws IOException {
400         if ((file == null) || !file.exists()) {
401             return null;
402         }
403 
404         FileInputStream is = new FileInputStream(file);
405 
406         byte[] bytes = getBytes(is, (int)file.length());
407 
408         is.close();
409 
410         return bytes;
411     }
412 
413     public byte[] getBytes(InputStream is) throws IOException {
414         return getBytes(is, -1);
415     }
416 
417     public byte[] getBytes(InputStream inputStream, int bufferSize)
418         throws IOException {
419 
420         UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
421             new UnsyncByteArrayOutputStream();
422 
423         StreamUtil.transfer(
424             inputStream, unsyncByteArrayOutputStream, bufferSize);
425 
426         return unsyncByteArrayOutputStream.toByteArray();
427     }
428 
429     public String getExtension(String fileName) {
430         if (fileName == null) {
431             return null;
432         }
433 
434         int pos = fileName.lastIndexOf(StringPool.PERIOD);
435 
436         if (pos > 0) {
437             return fileName.substring(pos + 1, fileName.length()).toLowerCase();
438         }
439         else {
440             return StringPool.BLANK;
441         }
442     }
443 
444     public String getPath(String fullFileName) {
445         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
446 
447         if (pos == -1) {
448             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
449         }
450 
451         String shortFileName = fullFileName.substring(0, pos);
452 
453         if (Validator.isNull(shortFileName)) {
454             return StringPool.SLASH;
455         }
456 
457         return shortFileName;
458     }
459 
460     public String getShortFileName(String fullFileName) {
461         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
462 
463         if (pos == -1) {
464             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
465         }
466 
467         String shortFileName =
468             fullFileName.substring(pos + 1, fullFileName.length());
469 
470         return shortFileName;
471     }
472 
473     public boolean isAscii(File file) throws IOException {
474         boolean ascii = true;
475 
476         nsDetector detector = new nsDetector(nsPSMDetector.ALL);
477 
478         InputStream inputStream = new FileInputStream(file);
479 
480         byte[] buffer = new byte[1024];
481 
482         int len = 0;
483 
484         while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
485 
486             if (ascii) {
487                 ascii = detector.isAscii(buffer, len);
488 
489                 if (!ascii) {
490                     break;
491                 }
492             }
493         }
494 
495         detector.DataEnd();
496 
497         inputStream.close();
498 
499         return ascii;
500     }
501 
502     public String[] listDirs(String fileName) {
503         return listDirs(new File(fileName));
504     }
505 
506     public String[] listDirs(File file) {
507         List<String> dirs = new ArrayList<String>();
508 
509         File[] fileArray = file.listFiles();
510 
511         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
512             if (fileArray[i].isDirectory()) {
513                 dirs.add(fileArray[i].getName());
514             }
515         }
516 
517         return dirs.toArray(new String[dirs.size()]);
518     }
519 
520     public String[] listFiles(String fileName) {
521         if (Validator.isNull(fileName)) {
522             return new String[0];
523         }
524 
525         return listFiles(new File(fileName));
526     }
527 
528     public String[] listFiles(File file) {
529         List<String> files = new ArrayList<String>();
530 
531         File[] fileArray = file.listFiles();
532 
533         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
534             if (fileArray[i].isFile()) {
535                 files.add(fileArray[i].getName());
536             }
537         }
538 
539         return files.toArray(new String[files.size()]);
540     }
541 
542     public void mkdirs(String pathName) {
543         File file = new File(pathName);
544 
545         file.mkdirs();
546     }
547 
548     public boolean move(String sourceFileName, String destinationFileName) {
549         return move(new File(sourceFileName), new File(destinationFileName));
550     }
551 
552     public boolean move(File source, File destination) {
553         if (!source.exists()) {
554             return false;
555         }
556 
557         destination.delete();
558 
559         return source.renameTo(destination);
560     }
561 
562     public String read(String fileName) throws IOException {
563         return read(new File(fileName));
564     }
565 
566     public String read(File file) throws IOException {
567         return read(file, false);
568     }
569 
570     public String read(File file, boolean raw) throws IOException {
571         FileInputStream fis = new FileInputStream(file);
572 
573         byte[] bytes = new byte[fis.available()];
574 
575         fis.read(bytes);
576 
577         fis.close();
578 
579         String s = new String(bytes, StringPool.UTF8);
580 
581         if (raw) {
582             return s;
583         }
584         else {
585             return StringUtil.replace(
586                 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
587         }
588     }
589 
590     public String replaceSeparator(String fileName) {
591         return StringUtil.replace(
592             fileName, StringPool.BACK_SLASH, StringPool.SLASH);
593     }
594 
595     public File[] sortFiles(File[] files) {
596         if (files == null) {
597             return null;
598         }
599 
600         Arrays.sort(files, new FileComparator());
601 
602         List<File> directoryList = new ArrayList<File>();
603         List<File> fileList = new ArrayList<File>();
604 
605         for (int i = 0; i < files.length; i++) {
606             if (files[i].isDirectory()) {
607                 directoryList.add(files[i]);
608             }
609             else {
610                 fileList.add(files[i]);
611             }
612         }
613 
614         directoryList.addAll(fileList);
615 
616         return directoryList.toArray(new File[directoryList.size()]);
617     }
618 
619     public String stripExtension(String fileName) {
620         if (fileName == null) {
621             return null;
622         }
623 
624         String ext = getExtension(fileName);
625 
626         if (ext.length() > 0) {
627             return fileName.substring(0, fileName.length() - ext.length() - 1);
628         }
629         else {
630             return fileName;
631         }
632     }
633 
634     public List<String> toList(Reader reader) {
635         List<String> list = new ArrayList<String>();
636 
637         try {
638             UnsyncBufferedReader unsyncBufferedReader =
639                 new UnsyncBufferedReader(reader);
640 
641             String line = null;
642 
643             while ((line = unsyncBufferedReader.readLine()) != null) {
644                 list.add(line);
645             }
646 
647             unsyncBufferedReader.close();
648         }
649         catch (IOException ioe) {
650         }
651 
652         return list;
653     }
654 
655     public List<String> toList(String fileName) {
656         try {
657             return toList(new FileReader(fileName));
658         }
659         catch (IOException ioe) {
660             return new ArrayList<String>();
661         }
662     }
663 
664     public Properties toProperties(FileInputStream fis) {
665         Properties props = new Properties();
666 
667         try {
668             props.load(fis);
669         }
670         catch (IOException ioe) {
671         }
672 
673         return props;
674     }
675 
676     public Properties toProperties(String fileName) {
677         try {
678             return toProperties(new FileInputStream(fileName));
679         }
680         catch (IOException ioe) {
681             return new Properties();
682         }
683     }
684 
685     public void write(String fileName, String s) throws IOException {
686         write(new File(fileName), s);
687     }
688 
689     public void write(String fileName, String s, boolean lazy)
690         throws IOException {
691 
692         write(new File(fileName), s, lazy);
693     }
694 
695     public void write(String fileName, String s, boolean lazy, boolean append)
696         throws IOException {
697 
698         write(new File(fileName), s, lazy, append);
699     }
700 
701     public void write(String pathName, String fileName, String s)
702         throws IOException {
703 
704         write(new File(pathName, fileName), s);
705     }
706 
707     public void write(String pathName, String fileName, String s, boolean lazy)
708         throws IOException {
709 
710         write(new File(pathName, fileName), s, lazy);
711     }
712 
713     public void write(
714             String pathName, String fileName, String s, boolean lazy,
715             boolean append)
716         throws IOException {
717 
718         write(new File(pathName, fileName), s, lazy, append);
719     }
720 
721     public void write(File file, String s) throws IOException {
722         write(file, s, false);
723     }
724 
725     public void write(File file, String s, boolean lazy)
726         throws IOException {
727 
728         write(file, s, lazy, false);
729     }
730 
731     public void write(File file, String s, boolean lazy, boolean append)
732         throws IOException {
733 
734         if (file.getParent() != null) {
735             mkdirs(file.getParent());
736         }
737 
738         if (lazy && file.exists()) {
739             String content = read(file);
740 
741             if (content.equals(s)) {
742                 return;
743             }
744         }
745 
746         Writer writer = new OutputStreamWriter(
747             new FileOutputStream(file, append), StringPool.UTF8);
748 
749         writer.write(s);
750 
751         writer.close();
752     }
753 
754     public void write(String fileName, byte[] bytes) throws IOException {
755         write(new File(fileName), bytes);
756     }
757 
758     public void write(File file, byte[] bytes) throws IOException {
759         write(file, bytes, 0, bytes.length);
760     }
761 
762     public void write(File file, byte[] bytes, int offset, int length)
763         throws IOException {
764 
765         if (file.getParent() != null) {
766             mkdirs(file.getParent());
767         }
768 
769         FileOutputStream fos = new FileOutputStream(file);
770 
771         fos.write(bytes, offset, length);
772 
773         fos.close();
774     }
775 
776     public void write(String fileName, InputStream is) throws IOException {
777         write(new File(fileName), is);
778     }
779 
780     public void write(File file, InputStream is) throws IOException {
781         if (file.getParent() != null) {
782             mkdirs(file.getParent());
783         }
784 
785         StreamUtil.transfer(is, new FileOutputStream(file));
786     }
787 
788     private static final String[] _SAFE_FILE_NAME_1 = {
789         StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
790         StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
791     };
792 
793     private static final String[] _SAFE_FILE_NAME_2 = {
794         "_AMP_", "_CP_", "_OP_", "_SEM_"
795     };
796 
797     private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
798 
799     private static FileImpl _instance = new FileImpl();
800 
801     private Map<String, TextExtractor> _textExtractors =
802         new HashMap<String, TextExtractor>();
803 
804 }