1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * The contents of this file are subject to the terms of the Liferay Enterprise
5    * Subscription License ("License"). You may not use this file except in
6    * compliance with the License. You can obtain a copy of the License by
7    * contacting Liferay, Inc. See the License for the specific language governing
8    * permissions and limitations under the License, including but not limited to
9    * distribution rights of the Software.
10   *
11   *
12   * 
13   */
14  
15  package com.liferay.portal.util;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
18  import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
19  import com.liferay.portal.kernel.log.Log;
20  import com.liferay.portal.kernel.log.LogFactoryUtil;
21  import com.liferay.portal.kernel.util.ContentTypes;
22  import com.liferay.portal.kernel.util.FileComparator;
23  import com.liferay.portal.kernel.util.MimeTypesUtil;
24  import com.liferay.portal.kernel.util.StreamUtil;
25  import com.liferay.portal.kernel.util.StringBundler;
26  import com.liferay.portal.kernel.util.StringPool;
27  import com.liferay.portal.kernel.util.StringUtil;
28  import com.liferay.portal.kernel.util.Time;
29  import com.liferay.portal.kernel.util.Validator;
30  import com.liferay.util.PwdGenerator;
31  import com.liferay.util.SystemProperties;
32  import com.liferay.util.lucene.JerichoHTMLTextExtractor;
33  
34  import java.io.BufferedInputStream;
35  import java.io.File;
36  import java.io.FileInputStream;
37  import java.io.FileOutputStream;
38  import java.io.FileReader;
39  import java.io.IOException;
40  import java.io.InputStream;
41  import java.io.OutputStreamWriter;
42  import java.io.Reader;
43  import java.io.Writer;
44  
45  import java.util.ArrayList;
46  import java.util.Arrays;
47  import java.util.HashMap;
48  import java.util.List;
49  import java.util.Map;
50  import java.util.Properties;
51  
52  import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
53  import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
54  import org.apache.jackrabbit.extractor.MsWordTextExtractor;
55  import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
56  import org.apache.jackrabbit.extractor.PdfTextExtractor;
57  import org.apache.jackrabbit.extractor.PlainTextExtractor;
58  import org.apache.jackrabbit.extractor.RTFTextExtractor;
59  import org.apache.jackrabbit.extractor.TextExtractor;
60  import org.apache.jackrabbit.extractor.XMLTextExtractor;
61  import org.apache.poi.POITextExtractor;
62  import org.apache.poi.extractor.ExtractorFactory;
63  
64  import org.mozilla.intl.chardet.nsDetector;
65  import org.mozilla.intl.chardet.nsPSMDetector;
66  
67  /**
68   * <a href="FileImpl.java.html"><b><i>View Source</i></b></a>
69   *
70   * @author Brian Wing Shun Chan
71   * @author Alexander Chow
72   */
73  public class FileImpl implements com.liferay.portal.kernel.util.File {
74  
75      public static FileImpl getInstance() {
76          return _instance;
77      }
78  
79      public FileImpl() {
80          Class<?>[] textExtractorClasses = new Class[] {
81              JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
82              MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
83              OpenOfficeTextExtractor.class, PdfTextExtractor.class,
84              PlainTextExtractor.class, RTFTextExtractor.class,
85              XMLTextExtractor.class
86          };
87  
88          for (Class<?> textExtractorClass : textExtractorClasses) {
89              try {
90                  TextExtractor textExtractor =
91                      (TextExtractor)textExtractorClass.newInstance();
92  
93                  String[] contentTypes = textExtractor.getContentTypes();
94  
95                  for (String contentType : contentTypes) {
96                      _textExtractors.put(contentType, textExtractor);
97                  }
98              }
99              catch (Exception e) {
100                 _log.error(e, e);
101             }
102         }
103     }
104 
105     public void copyDirectory(String sourceDirName, String destinationDirName) {
106         copyDirectory(new File(sourceDirName), new File(destinationDirName));
107     }
108 
109     public void copyDirectory(File source, File destination) {
110         if (source.exists() && source.isDirectory()) {
111             if (!destination.exists()) {
112                 destination.mkdirs();
113             }
114 
115             File[] fileArray = source.listFiles();
116 
117             for (int i = 0; i < fileArray.length; i++) {
118                 if (fileArray[i].isDirectory()) {
119                     copyDirectory(
120                         fileArray[i],
121                         new File(destination.getPath() + File.separator
122                             + fileArray[i].getName()));
123                 }
124                 else {
125                     copyFile(
126                         fileArray[i],
127                         new File(destination.getPath() + File.separator
128                             + fileArray[i].getName()));
129                 }
130             }
131         }
132     }
133 
134     public void copyFile(String source, String destination) {
135         copyFile(source, destination, false);
136     }
137 
138     public void copyFile(String source, String destination, boolean lazy) {
139         copyFile(new File(source), new File(destination), lazy);
140     }
141 
142     public void copyFile(File source, File destination) {
143         copyFile(source, destination, false);
144     }
145 
146     public void copyFile(File source, File destination, boolean lazy) {
147         if (!source.exists()) {
148             return;
149         }
150 
151         if (lazy) {
152             String oldContent = null;
153 
154             try {
155                 oldContent = read(source);
156             }
157             catch (Exception e) {
158                 return;
159             }
160 
161             String newContent = null;
162 
163             try {
164                 newContent = read(destination);
165             }
166             catch (Exception e) {
167             }
168 
169             if ((oldContent == null) || !oldContent.equals(newContent)) {
170                 copyFile(source, destination, false);
171             }
172         }
173         else {
174             if ((destination.getParentFile() != null) &&
175                 (!destination.getParentFile().exists())) {
176 
177                 destination.getParentFile().mkdirs();
178             }
179 
180             try {
181                 StreamUtil.transfer(
182                     new FileInputStream(source),
183                     new FileOutputStream(destination));
184             }
185             catch (IOException ioe) {
186                 _log.error(ioe.getMessage());
187             }
188         }
189     }
190 
191     public File createTempFile() {
192         return createTempFile(null);
193     }
194 
195     public File createTempFile(String extension) {
196         return new File(createTempFileName(extension));
197     }
198 
199     public String createTempFileName() {
200         return createTempFileName(null);
201     }
202 
203     public String createTempFileName(String extension) {
204         StringBundler sb = new StringBundler();
205 
206         sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
207         sb.append(StringPool.SLASH);
208         sb.append(Time.getTimestamp());
209         sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
210 
211         if (Validator.isNotNull(extension)) {
212             sb.append(StringPool.PERIOD);
213             sb.append(extension);
214         }
215 
216         return sb.toString();
217     }
218 
219     public String decodeSafeFileName(String fileName) {
220         return StringUtil.replace(
221             fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
222     }
223 
224     public boolean delete(String file) {
225         return delete(new File(file));
226     }
227 
228     public boolean delete(File file) {
229         if ((file != null) && file.exists()) {
230             return file.delete();
231         }
232         else {
233             return false;
234         }
235     }
236 
237     public void deltree(String directory) {
238         deltree(new File(directory));
239     }
240 
241     public void deltree(File directory) {
242         if (directory.exists() && directory.isDirectory()) {
243             File[] fileArray = directory.listFiles();
244 
245             for (int i = 0; i < fileArray.length; i++) {
246                 if (fileArray[i].isDirectory()) {
247                     deltree(fileArray[i]);
248                 }
249                 else {
250                     fileArray[i].delete();
251                 }
252             }
253 
254             directory.delete();
255         }
256     }
257 
258     public String encodeSafeFileName(String fileName) {
259         return StringUtil.replace(
260             fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
261     }
262 
263     public boolean exists(String fileName) {
264         return exists(new File(fileName));
265     }
266 
267     public boolean exists(File file) {
268         return file.exists();
269     }
270 
271     public String extractText(InputStream is, String fileName) {
272         String text = null;
273 
274         try {
275             if (!is.markSupported()) {
276                 is = new BufferedInputStream(is);
277             }
278 
279             String contentType = MimeTypesUtil.getContentType(is, fileName);
280 
281             TextExtractor textExtractor = _textExtractors.get(contentType);
282 
283             if (textExtractor != null) {
284                 if (_log.isInfoEnabled()) {
285                     _log.info(
286                         "Using text extractor " +
287                             textExtractor.getClass().getName());
288                 }
289 
290                 StringBuilder sb = new StringBuilder();
291 
292                 Reader reader = textExtractor.extractText(
293                     is, contentType, null);
294 
295                 try{
296                     char[] buffer = new char[1024];
297 
298                     int result = -1;
299 
300                     while ((result = reader.read(buffer)) != -1) {
301                         sb.append(buffer, 0, result);
302                     }
303                 }
304                 finally {
305                     try {
306                         reader.close();
307                     }
308                     catch (IOException ioe) {
309                     }
310                 }
311 
312                 text = sb.toString();
313             }
314             else {
315                 if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
316                     contentType.startsWith(
317                         "application/vnd.openxmlformats-officedocument.")) {
318 
319                     try {
320                         POITextExtractor poiTextExtractor =
321                             ExtractorFactory.createExtractor(is);
322 
323                         text = poiTextExtractor.getText();
324                     }
325                     catch (Exception e) {
326                         if (_log.isWarnEnabled()) {
327                             _log.warn(e, e);
328                         }
329                     }
330                 }
331 
332                 if ((text == null) && _log.isInfoEnabled()) {
333                     _log.info("No text extractor found for " + fileName);
334                 }
335             }
336         }
337         catch (Exception e) {
338             _log.error(e);
339         }
340 
341         if (_log.isDebugEnabled()) {
342             _log.debug("Extractor returned text:\n\n" + text);
343         }
344 
345         if (text == null) {
346             text = StringPool.BLANK;
347         }
348 
349         return text;
350     }
351 
352     public String getAbsolutePath(File file) {
353         return StringUtil.replace(
354             file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
355     }
356 
357     public byte[] getBytes(File file) throws IOException {
358         if ((file == null) || !file.exists()) {
359             return null;
360         }
361 
362         FileInputStream is = new FileInputStream(file);
363 
364         byte[] bytes = getBytes(is, (int)file.length());
365 
366         is.close();
367 
368         return bytes;
369     }
370 
371     public byte[] getBytes(InputStream is) throws IOException {
372         return getBytes(is, -1);
373     }
374 
375     public byte[] getBytes(InputStream inputStream, int bufferSize)
376         throws IOException {
377 
378         UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
379             new UnsyncByteArrayOutputStream();
380 
381         StreamUtil.transfer(
382             inputStream, unsyncByteArrayOutputStream, bufferSize);
383 
384         return unsyncByteArrayOutputStream.toByteArray();
385     }
386 
387     public String getExtension(String fileName) {
388         if (fileName == null) {
389             return null;
390         }
391 
392         int pos = fileName.lastIndexOf(StringPool.PERIOD);
393 
394         if (pos != -1) {
395             return fileName.substring(pos + 1, fileName.length()).toLowerCase();
396         }
397         else {
398             return null;
399         }
400     }
401 
402     public String getPath(String fullFileName) {
403         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
404 
405         if (pos == -1) {
406             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
407         }
408 
409         String shortFileName = fullFileName.substring(0, pos);
410 
411         if (Validator.isNull(shortFileName)) {
412             return StringPool.SLASH;
413         }
414 
415         return shortFileName;
416     }
417 
418     public String getShortFileName(String fullFileName) {
419         int pos = fullFileName.lastIndexOf(StringPool.SLASH);
420 
421         if (pos == -1) {
422             pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
423         }
424 
425         String shortFileName =
426             fullFileName.substring(pos + 1, fullFileName.length());
427 
428         return shortFileName;
429     }
430 
431     public boolean isAscii(File file) throws IOException {
432         boolean ascii = true;
433 
434         nsDetector detector = new nsDetector(nsPSMDetector.ALL);
435 
436         InputStream inputStream = new FileInputStream(file);
437 
438         byte[] buffer = new byte[1024];
439 
440         int len = 0;
441 
442         while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
443 
444             if (ascii) {
445                 ascii = detector.isAscii(buffer, len);
446 
447                 if (!ascii) {
448                     break;
449                 }
450             }
451         }
452 
453         detector.DataEnd();
454 
455         inputStream.close();
456 
457         return ascii;
458     }
459 
460     public String[] listDirs(String fileName) {
461         return listDirs(new File(fileName));
462     }
463 
464     public String[] listDirs(File file) {
465         List<String> dirs = new ArrayList<String>();
466 
467         File[] fileArray = file.listFiles();
468 
469         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
470             if (fileArray[i].isDirectory()) {
471                 dirs.add(fileArray[i].getName());
472             }
473         }
474 
475         return dirs.toArray(new String[dirs.size()]);
476     }
477 
478     public String[] listFiles(String fileName) {
479         if (Validator.isNull(fileName)) {
480             return new String[0];
481         }
482 
483         return listFiles(new File(fileName));
484     }
485 
486     public String[] listFiles(File file) {
487         List<String> files = new ArrayList<String>();
488 
489         File[] fileArray = file.listFiles();
490 
491         for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
492             if (fileArray[i].isFile()) {
493                 files.add(fileArray[i].getName());
494             }
495         }
496 
497         return files.toArray(new String[files.size()]);
498     }
499 
500     public void mkdirs(String pathName) {
501         File file = new File(pathName);
502 
503         file.mkdirs();
504     }
505 
506     public boolean move(String sourceFileName, String destinationFileName) {
507         return move(new File(sourceFileName), new File(destinationFileName));
508     }
509 
510     public boolean move(File source, File destination) {
511         if (!source.exists()) {
512             return false;
513         }
514 
515         destination.delete();
516 
517         return source.renameTo(destination);
518     }
519 
520     public String read(String fileName) throws IOException {
521         return read(new File(fileName));
522     }
523 
524     public String read(File file) throws IOException {
525         return read(file, false);
526     }
527 
528     public String read(File file, boolean raw) throws IOException {
529         FileInputStream fis = new FileInputStream(file);
530 
531         byte[] bytes = new byte[fis.available()];
532 
533         fis.read(bytes);
534 
535         fis.close();
536 
537         String s = new String(bytes, StringPool.UTF8);
538 
539         if (raw) {
540             return s;
541         }
542         else {
543             return StringUtil.replace(
544                 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
545         }
546     }
547 
548     public String replaceSeparator(String fileName) {
549         return StringUtil.replace(
550             fileName, StringPool.BACK_SLASH, StringPool.SLASH);
551     }
552 
553     public File[] sortFiles(File[] files) {
554         if (files == null) {
555             return null;
556         }
557 
558         Arrays.sort(files, new FileComparator());
559 
560         List<File> directoryList = new ArrayList<File>();
561         List<File> fileList = new ArrayList<File>();
562 
563         for (int i = 0; i < files.length; i++) {
564             if (files[i].isDirectory()) {
565                 directoryList.add(files[i]);
566             }
567             else {
568                 fileList.add(files[i]);
569             }
570         }
571 
572         directoryList.addAll(fileList);
573 
574         return directoryList.toArray(new File[directoryList.size()]);
575     }
576 
577     public String stripExtension(String fileName) {
578         if (fileName == null) {
579             return null;
580         }
581 
582         int pos = fileName.lastIndexOf(StringPool.PERIOD);
583 
584         if (pos != -1) {
585             return fileName.substring(0, pos);
586         }
587         else {
588             return fileName;
589         }
590     }
591 
592     public List<String> toList(Reader reader) {
593         List<String> list = new ArrayList<String>();
594 
595         try {
596             UnsyncBufferedReader unsyncBufferedReader =
597                 new UnsyncBufferedReader(reader);
598 
599             String line = null;
600 
601             while ((line = unsyncBufferedReader.readLine()) != null) {
602                 list.add(line);
603             }
604 
605             unsyncBufferedReader.close();
606         }
607         catch (IOException ioe) {
608         }
609 
610         return list;
611     }
612 
613     public List<String> toList(String fileName) {
614         try {
615             return toList(new FileReader(fileName));
616         }
617         catch (IOException ioe) {
618             return new ArrayList<String>();
619         }
620     }
621 
622     public Properties toProperties(FileInputStream fis) {
623         Properties props = new Properties();
624 
625         try {
626             props.load(fis);
627         }
628         catch (IOException ioe) {
629         }
630 
631         return props;
632     }
633 
634     public Properties toProperties(String fileName) {
635         try {
636             return toProperties(new FileInputStream(fileName));
637         }
638         catch (IOException ioe) {
639             return new Properties();
640         }
641     }
642 
643     public void write(String fileName, String s) throws IOException {
644         write(new File(fileName), s);
645     }
646 
647     public void write(String fileName, String s, boolean lazy)
648         throws IOException {
649 
650         write(new File(fileName), s, lazy);
651     }
652 
653     public void write(String fileName, String s, boolean lazy, boolean append)
654         throws IOException {
655 
656         write(new File(fileName), s, lazy, append);
657     }
658 
659     public void write(String pathName, String fileName, String s)
660         throws IOException {
661 
662         write(new File(pathName, fileName), s);
663     }
664 
665     public void write(String pathName, String fileName, String s, boolean lazy)
666         throws IOException {
667 
668         write(new File(pathName, fileName), s, lazy);
669     }
670 
671     public void write(
672             String pathName, String fileName, String s, boolean lazy,
673             boolean append)
674         throws IOException {
675 
676         write(new File(pathName, fileName), s, lazy, append);
677     }
678 
679     public void write(File file, String s) throws IOException {
680         write(file, s, false);
681     }
682 
683     public void write(File file, String s, boolean lazy)
684         throws IOException {
685 
686         write(file, s, lazy, false);
687     }
688 
689     public void write(File file, String s, boolean lazy, boolean append)
690         throws IOException {
691 
692         if (file.getParent() != null) {
693             mkdirs(file.getParent());
694         }
695 
696         if (lazy && file.exists()) {
697             String content = read(file);
698 
699             if (content.equals(s)) {
700                 return;
701             }
702         }
703 
704         Writer writer = new OutputStreamWriter(
705             new FileOutputStream(file, append), StringPool.UTF8);
706 
707         writer.write(s);
708 
709         writer.close();
710     }
711 
712     public void write(String fileName, byte[] bytes) throws IOException {
713         write(new File(fileName), bytes);
714     }
715 
716     public void write(File file, byte[] bytes) throws IOException {
717         write(file, bytes, 0, bytes.length);
718     }
719 
720     public void write(File file, byte[] bytes, int offset, int length)
721         throws IOException {
722 
723         if (file.getParent() != null) {
724             mkdirs(file.getParent());
725         }
726 
727         FileOutputStream fos = new FileOutputStream(file);
728 
729         fos.write(bytes, offset, length);
730 
731         fos.close();
732     }
733 
734     public void write(String fileName, InputStream is) throws IOException {
735         write(new File(fileName), is);
736     }
737 
738     public void write(File file, InputStream is) throws IOException {
739         if (file.getParent() != null) {
740             mkdirs(file.getParent());
741         }
742 
743         StreamUtil.transfer(is, new FileOutputStream(file));
744     }
745 
746     private static final String[] _SAFE_FILE_NAME_1 = {
747         StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
748         StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
749     };
750 
751     private static final String[] _SAFE_FILE_NAME_2 = {
752         "_AMP_", "_CP_", "_OP_", "_SEM_"
753     };
754 
755     private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
756 
757     private static FileImpl _instance = new FileImpl();
758 
759     private Map<String, TextExtractor> _textExtractors =
760         new HashMap<String, TextExtractor>();
761 
762 }