001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
018 import com.liferay.portal.kernel.io.unsync.UnsyncByteArrayOutputStream;
019 import com.liferay.portal.kernel.log.Log;
020 import com.liferay.portal.kernel.log.LogFactoryUtil;
021 import com.liferay.portal.kernel.util.ContentTypes;
022 import com.liferay.portal.kernel.util.FileComparator;
023 import com.liferay.portal.kernel.util.JavaProps;
024 import com.liferay.portal.kernel.util.MimeTypesUtil;
025 import com.liferay.portal.kernel.util.ServerDetector;
026 import com.liferay.portal.kernel.util.StreamUtil;
027 import com.liferay.portal.kernel.util.StringBundler;
028 import com.liferay.portal.kernel.util.StringPool;
029 import com.liferay.portal.kernel.util.StringUtil;
030 import com.liferay.portal.kernel.util.Time;
031 import com.liferay.portal.kernel.util.Validator;
032 import com.liferay.util.PwdGenerator;
033 import com.liferay.util.SystemProperties;
034 import com.liferay.util.lucene.JerichoHTMLTextExtractor;
035
036 import java.io.BufferedInputStream;
037 import java.io.File;
038 import java.io.FileInputStream;
039 import java.io.FileOutputStream;
040 import java.io.FileReader;
041 import java.io.IOException;
042 import java.io.InputStream;
043 import java.io.OutputStreamWriter;
044 import java.io.RandomAccessFile;
045 import java.io.Reader;
046 import java.io.StringReader;
047 import java.io.Writer;
048
049 import java.util.ArrayList;
050 import java.util.Arrays;
051 import java.util.HashMap;
052 import java.util.List;
053 import java.util.Map;
054 import java.util.Properties;
055
056 import org.apache.jackrabbit.extractor.MsExcelTextExtractor;
057 import org.apache.jackrabbit.extractor.MsPowerPointTextExtractor;
058 import org.apache.jackrabbit.extractor.MsWordTextExtractor;
059 import org.apache.jackrabbit.extractor.OpenOfficeTextExtractor;
060 import org.apache.jackrabbit.extractor.PdfTextExtractor;
061 import org.apache.jackrabbit.extractor.PlainTextExtractor;
062 import org.apache.jackrabbit.extractor.RTFTextExtractor;
063 import org.apache.jackrabbit.extractor.TextExtractor;
064 import org.apache.jackrabbit.extractor.XMLTextExtractor;
065 import org.apache.poi.POITextExtractor;
066 import org.apache.poi.extractor.ExtractorFactory;
067
068 import org.mozilla.intl.chardet.nsDetector;
069 import org.mozilla.intl.chardet.nsPSMDetector;
070
071
075 public class FileImpl implements com.liferay.portal.kernel.util.File {
076
077 public static FileImpl getInstance() {
078 return _instance;
079 }
080
081 public FileImpl() {
082 Class<?>[] textExtractorClasses = new Class[] {
083 JerichoHTMLTextExtractor.class, MsExcelTextExtractor.class,
084 MsPowerPointTextExtractor.class, MsWordTextExtractor.class,
085 OpenOfficeTextExtractor.class, PdfTextExtractor.class,
086 PlainTextExtractor.class, RTFTextExtractor.class,
087 XMLTextExtractor.class
088 };
089
090 for (Class<?> textExtractorClass : textExtractorClasses) {
091 try {
092 TextExtractor textExtractor =
093 (TextExtractor)textExtractorClass.newInstance();
094
095 String[] contentTypes = textExtractor.getContentTypes();
096
097 for (String contentType : contentTypes) {
098 _textExtractors.put(contentType, textExtractor);
099 }
100 }
101 catch (Exception e) {
102 _log.error(e, e);
103 }
104 }
105 }
106
107 public void copyDirectory(String sourceDirName, String destinationDirName) {
108 copyDirectory(new File(sourceDirName), new File(destinationDirName));
109 }
110
111 public void copyDirectory(File source, File destination) {
112 if (source.exists() && source.isDirectory()) {
113 if (!destination.exists()) {
114 destination.mkdirs();
115 }
116
117 File[] fileArray = source.listFiles();
118
119 for (int i = 0; i < fileArray.length; i++) {
120 if (fileArray[i].isDirectory()) {
121 copyDirectory(
122 fileArray[i],
123 new File(destination.getPath() + File.separator
124 + fileArray[i].getName()));
125 }
126 else {
127 copyFile(
128 fileArray[i],
129 new File(destination.getPath() + File.separator
130 + fileArray[i].getName()));
131 }
132 }
133 }
134 }
135
136 public void copyFile(String source, String destination) {
137 copyFile(source, destination, false);
138 }
139
140 public void copyFile(String source, String destination, boolean lazy) {
141 copyFile(new File(source), new File(destination), lazy);
142 }
143
144 public void copyFile(File source, File destination) {
145 copyFile(source, destination, false);
146 }
147
148 public void copyFile(File source, File destination, boolean lazy) {
149 if (!source.exists()) {
150 return;
151 }
152
153 if (lazy) {
154 String oldContent = null;
155
156 try {
157 oldContent = read(source);
158 }
159 catch (Exception e) {
160 return;
161 }
162
163 String newContent = null;
164
165 try {
166 newContent = read(destination);
167 }
168 catch (Exception e) {
169 }
170
171 if ((oldContent == null) || !oldContent.equals(newContent)) {
172 copyFile(source, destination, false);
173 }
174 }
175 else {
176 if ((destination.getParentFile() != null) &&
177 (!destination.getParentFile().exists())) {
178
179 destination.getParentFile().mkdirs();
180 }
181
182 try {
183 StreamUtil.transfer(
184 new FileInputStream(source),
185 new FileOutputStream(destination));
186 }
187 catch (IOException ioe) {
188 _log.error(ioe.getMessage());
189 }
190 }
191 }
192
193 public File createTempFile() {
194 return createTempFile(null);
195 }
196
197 public File createTempFile(String extension) {
198 return new File(createTempFileName(extension));
199 }
200
201 public String createTempFileName() {
202 return createTempFileName(null);
203 }
204
205 public String createTempFileName(String extension) {
206 StringBundler sb = new StringBundler();
207
208 sb.append(SystemProperties.get(SystemProperties.TMP_DIR));
209 sb.append(StringPool.SLASH);
210 sb.append(Time.getTimestamp());
211 sb.append(PwdGenerator.getPassword(PwdGenerator.KEY2, 8));
212
213 if (Validator.isNotNull(extension)) {
214 sb.append(StringPool.PERIOD);
215 sb.append(extension);
216 }
217
218 return sb.toString();
219 }
220
221 public String decodeSafeFileName(String fileName) {
222 return StringUtil.replace(
223 fileName, _SAFE_FILE_NAME_2, _SAFE_FILE_NAME_1);
224 }
225
226 public boolean delete(String file) {
227 return delete(new File(file));
228 }
229
230 public boolean delete(File file) {
231 if ((file != null) && file.exists()) {
232 return file.delete();
233 }
234 else {
235 return false;
236 }
237 }
238
239 public void deltree(String directory) {
240 deltree(new File(directory));
241 }
242
243 public void deltree(File directory) {
244 if (directory.exists() && directory.isDirectory()) {
245 File[] fileArray = directory.listFiles();
246
247 for (int i = 0; i < fileArray.length; i++) {
248 if (fileArray[i].isDirectory()) {
249 deltree(fileArray[i]);
250 }
251 else {
252 fileArray[i].delete();
253 }
254 }
255
256 directory.delete();
257 }
258 }
259
260 public String encodeSafeFileName(String fileName) {
261 if (fileName == null) {
262 return StringPool.BLANK;
263 }
264
265 return StringUtil.replace(
266 fileName, _SAFE_FILE_NAME_1, _SAFE_FILE_NAME_2);
267 }
268
269 public boolean exists(String fileName) {
270 return exists(new File(fileName));
271 }
272
273 public boolean exists(File file) {
274 return file.exists();
275 }
276
277 public String extractText(InputStream is, String fileName) {
278 String text = null;
279
280 try {
281 if (!is.markSupported()) {
282 is = new BufferedInputStream(is);
283 }
284
285 String contentType = MimeTypesUtil.getContentType(is, fileName);
286
287 if (_log.isInfoEnabled()) {
288 _log.info(
289 "Attempting to extract text from " + fileName +
290 " of type " + contentType);
291 }
292
293 TextExtractor textExtractor = _textExtractors.get(contentType);
294
295 if (textExtractor != null) {
296 if (_log.isInfoEnabled()) {
297 _log.info(
298 "Using text extractor " +
299 textExtractor.getClass().getName());
300 }
301
302 StringBuilder sb = new StringBuilder();
303
304 Reader reader = null;
305
306 if (ServerDetector.isJOnAS() && JavaProps.isJDK6() &&
307 contentType.equals(ContentTypes.APPLICATION_MSWORD)) {
308
309 if (_log.isWarnEnabled()) {
310 _log.warn(
311 "JOnAS 5 with JDK 6 has a known issue with text " +
312 "extraction of Word documents. Use JDK 5 if " +
313 "you require indexing of Word documents.");
314 }
315
316 if (_log.isDebugEnabled()) {
317
318
319
320
321 reader = textExtractor.extractText(
322 is, contentType, null);
323 }
324 else {
325 reader = new StringReader(StringPool.BLANK);
326 }
327 }
328 else {
329 reader = textExtractor.extractText(
330 is, contentType, null);
331 }
332
333 try{
334 char[] buffer = new char[1024];
335
336 int result = -1;
337
338 while ((result = reader.read(buffer)) != -1) {
339 sb.append(buffer, 0, result);
340 }
341 }
342 finally {
343 try {
344 reader.close();
345 }
346 catch (IOException ioe) {
347 }
348 }
349
350 text = sb.toString();
351 }
352 else if (contentType.equals(ContentTypes.APPLICATION_ZIP) ||
353 contentType.startsWith(
354 "application/vnd.openxmlformats-officedocument.")) {
355
356 try {
357 POITextExtractor poiTextExtractor =
358 ExtractorFactory.createExtractor(is);
359
360 text = poiTextExtractor.getText();
361 }
362 catch (Exception e) {
363 if (_log.isInfoEnabled()) {
364 _log.info(e.getMessage());
365 }
366 }
367 }
368 }
369 catch (Exception e) {
370 _log.error(e, e);
371 }
372
373 if (_log.isInfoEnabled()) {
374 if (text == null) {
375 _log.info("No text extractor found for " + fileName);
376 }
377 else {
378 _log.info("Text was extracted for " + fileName);
379 }
380 }
381
382 if (_log.isDebugEnabled()) {
383 _log.debug("Extractor returned text:\n\n" + text);
384 }
385
386 if (text == null) {
387 text = StringPool.BLANK;
388 }
389
390 return text;
391 }
392
393 public String getAbsolutePath(File file) {
394 return StringUtil.replace(
395 file.getAbsolutePath(), StringPool.BACK_SLASH, StringPool.SLASH);
396 }
397
398 public byte[] getBytes(File file) throws IOException {
399 if ((file == null) || !file.exists()) {
400 return null;
401 }
402
403 RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
404
405 byte[] bytes = new byte[(int)randomAccessFile.length()];
406
407 randomAccessFile.readFully(bytes);
408
409 randomAccessFile.close();
410
411 return bytes;
412 }
413
414 public byte[] getBytes(InputStream is) throws IOException {
415 return getBytes(is, -1);
416 }
417
418 public byte[] getBytes(InputStream inputStream, int bufferSize)
419 throws IOException {
420
421 UnsyncByteArrayOutputStream unsyncByteArrayOutputStream =
422 new UnsyncByteArrayOutputStream();
423
424 StreamUtil.transfer(
425 inputStream, unsyncByteArrayOutputStream, bufferSize);
426
427 return unsyncByteArrayOutputStream.toByteArray();
428 }
429
430 public String getExtension(String fileName) {
431 if (fileName == null) {
432 return null;
433 }
434
435 int pos = fileName.lastIndexOf(StringPool.PERIOD);
436
437 if (pos > 0) {
438 return fileName.substring(pos + 1, fileName.length()).toLowerCase();
439 }
440 else {
441 return StringPool.BLANK;
442 }
443 }
444
445 public String getPath(String fullFileName) {
446 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
447
448 if (pos == -1) {
449 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
450 }
451
452 String shortFileName = fullFileName.substring(0, pos);
453
454 if (Validator.isNull(shortFileName)) {
455 return StringPool.SLASH;
456 }
457
458 return shortFileName;
459 }
460
461 public String getShortFileName(String fullFileName) {
462 int pos = fullFileName.lastIndexOf(StringPool.SLASH);
463
464 if (pos == -1) {
465 pos = fullFileName.lastIndexOf(StringPool.BACK_SLASH);
466 }
467
468 String shortFileName =
469 fullFileName.substring(pos + 1, fullFileName.length());
470
471 return shortFileName;
472 }
473
474 public boolean isAscii(File file) throws IOException {
475 boolean ascii = true;
476
477 nsDetector detector = new nsDetector(nsPSMDetector.ALL);
478
479 InputStream inputStream = new FileInputStream(file);
480
481 byte[] buffer = new byte[1024];
482
483 int len = 0;
484
485 while ((len = inputStream.read(buffer, 0, buffer.length)) != -1) {
486 if (ascii) {
487 ascii = detector.isAscii(buffer, len);
488
489 if (!ascii) {
490 break;
491 }
492 }
493 }
494
495 detector.DataEnd();
496
497 inputStream.close();
498
499 return ascii;
500 }
501
502 public String[] listDirs(String fileName) {
503 return listDirs(new File(fileName));
504 }
505
506 public String[] listDirs(File file) {
507 List<String> dirs = new ArrayList<String>();
508
509 File[] fileArray = file.listFiles();
510
511 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
512 if (fileArray[i].isDirectory()) {
513 dirs.add(fileArray[i].getName());
514 }
515 }
516
517 return dirs.toArray(new String[dirs.size()]);
518 }
519
520 public String[] listFiles(String fileName) {
521 if (Validator.isNull(fileName)) {
522 return new String[0];
523 }
524
525 return listFiles(new File(fileName));
526 }
527
528 public String[] listFiles(File file) {
529 List<String> files = new ArrayList<String>();
530
531 File[] fileArray = file.listFiles();
532
533 for (int i = 0; (fileArray != null) && (i < fileArray.length); i++) {
534 if (fileArray[i].isFile()) {
535 files.add(fileArray[i].getName());
536 }
537 }
538
539 return files.toArray(new String[files.size()]);
540 }
541
542 public void mkdirs(String pathName) {
543 File file = new File(pathName);
544
545 file.mkdirs();
546 }
547
548 public boolean move(String sourceFileName, String destinationFileName) {
549 return move(new File(sourceFileName), new File(destinationFileName));
550 }
551
552 public boolean move(File source, File destination) {
553 if (!source.exists()) {
554 return false;
555 }
556
557 destination.delete();
558
559 return source.renameTo(destination);
560 }
561
562 public String read(String fileName) throws IOException {
563 return read(new File(fileName));
564 }
565
566 public String read(File file) throws IOException {
567 return read(file, false);
568 }
569
570 public String read(File file, boolean raw) throws IOException {
571 byte[] bytes = getBytes(file);
572
573 if (bytes == null) {
574 return null;
575 }
576
577 String s = new String(bytes, StringPool.UTF8);
578
579 if (raw) {
580 return s;
581 }
582 else {
583 return StringUtil.replace(
584 s, StringPool.RETURN_NEW_LINE, StringPool.NEW_LINE);
585 }
586 }
587
588 public String replaceSeparator(String fileName) {
589 return StringUtil.replace(
590 fileName, StringPool.BACK_SLASH, StringPool.SLASH);
591 }
592
593 public File[] sortFiles(File[] files) {
594 if (files == null) {
595 return null;
596 }
597
598 Arrays.sort(files, new FileComparator());
599
600 List<File> directoryList = new ArrayList<File>();
601 List<File> fileList = new ArrayList<File>();
602
603 for (int i = 0; i < files.length; i++) {
604 if (files[i].isDirectory()) {
605 directoryList.add(files[i]);
606 }
607 else {
608 fileList.add(files[i]);
609 }
610 }
611
612 directoryList.addAll(fileList);
613
614 return directoryList.toArray(new File[directoryList.size()]);
615 }
616
617 public String stripExtension(String fileName) {
618 if (fileName == null) {
619 return null;
620 }
621
622 String ext = getExtension(fileName);
623
624 if (ext.length() > 0) {
625 return fileName.substring(0, fileName.length() - ext.length() - 1);
626 }
627 else {
628 return fileName;
629 }
630 }
631
632 public List<String> toList(Reader reader) {
633 List<String> list = new ArrayList<String>();
634
635 try {
636 UnsyncBufferedReader unsyncBufferedReader =
637 new UnsyncBufferedReader(reader);
638
639 String line = null;
640
641 while ((line = unsyncBufferedReader.readLine()) != null) {
642 list.add(line);
643 }
644
645 unsyncBufferedReader.close();
646 }
647 catch (IOException ioe) {
648 }
649
650 return list;
651 }
652
653 public List<String> toList(String fileName) {
654 try {
655 return toList(new FileReader(fileName));
656 }
657 catch (IOException ioe) {
658 return new ArrayList<String>();
659 }
660 }
661
662 public Properties toProperties(FileInputStream fis) {
663 Properties props = new Properties();
664
665 try {
666 props.load(fis);
667 }
668 catch (IOException ioe) {
669 }
670
671 return props;
672 }
673
674 public Properties toProperties(String fileName) {
675 try {
676 return toProperties(new FileInputStream(fileName));
677 }
678 catch (IOException ioe) {
679 return new Properties();
680 }
681 }
682
683 public void write(String fileName, String s) throws IOException {
684 write(new File(fileName), s);
685 }
686
687 public void write(String fileName, String s, boolean lazy)
688 throws IOException {
689
690 write(new File(fileName), s, lazy);
691 }
692
693 public void write(String fileName, String s, boolean lazy, boolean append)
694 throws IOException {
695
696 write(new File(fileName), s, lazy, append);
697 }
698
699 public void write(String pathName, String fileName, String s)
700 throws IOException {
701
702 write(new File(pathName, fileName), s);
703 }
704
705 public void write(String pathName, String fileName, String s, boolean lazy)
706 throws IOException {
707
708 write(new File(pathName, fileName), s, lazy);
709 }
710
711 public void write(
712 String pathName, String fileName, String s, boolean lazy,
713 boolean append)
714 throws IOException {
715
716 write(new File(pathName, fileName), s, lazy, append);
717 }
718
719 public void write(File file, String s) throws IOException {
720 write(file, s, false);
721 }
722
723 public void write(File file, String s, boolean lazy)
724 throws IOException {
725
726 write(file, s, lazy, false);
727 }
728
729 public void write(File file, String s, boolean lazy, boolean append)
730 throws IOException {
731
732 if (file.getParent() != null) {
733 mkdirs(file.getParent());
734 }
735
736 if (lazy && file.exists()) {
737 String content = read(file);
738
739 if (content.equals(s)) {
740 return;
741 }
742 }
743
744 Writer writer = new OutputStreamWriter(
745 new FileOutputStream(file, append), StringPool.UTF8);
746
747 writer.write(s);
748
749 writer.close();
750 }
751
752 public void write(String fileName, byte[] bytes) throws IOException {
753 write(new File(fileName), bytes);
754 }
755
756 public void write(File file, byte[] bytes) throws IOException {
757 write(file, bytes, 0, bytes.length);
758 }
759
760 public void write(File file, byte[] bytes, int offset, int length)
761 throws IOException {
762
763 if (file.getParent() != null) {
764 mkdirs(file.getParent());
765 }
766
767 FileOutputStream fos = new FileOutputStream(file);
768
769 fos.write(bytes, offset, length);
770
771 fos.close();
772 }
773
774 public void write(String fileName, InputStream is) throws IOException {
775 write(new File(fileName), is);
776 }
777
778 public void write(File file, InputStream is) throws IOException {
779 if (file.getParent() != null) {
780 mkdirs(file.getParent());
781 }
782
783 StreamUtil.transfer(is, new FileOutputStream(file));
784 }
785
786 private static final String[] _SAFE_FILE_NAME_1 = {
787 StringPool.AMPERSAND, StringPool.CLOSE_PARENTHESIS,
788 StringPool.OPEN_PARENTHESIS, StringPool.SEMICOLON
789 };
790
791 private static final String[] _SAFE_FILE_NAME_2 = {
792 "_AMP_", "_CP_", "_OP_", "_SEM_"
793 };
794
795 private static Log _log = LogFactoryUtil.getLog(FileImpl.class);
796
797 private static FileImpl _instance = new FileImpl();
798
799 private Map<String, TextExtractor> _textExtractors =
800 new HashMap<String, TextExtractor>();
801
802 }