1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.portlet.wiki.importers.mediawiki;
16  
17  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
18  import com.liferay.portal.NoSuchUserException;
19  import com.liferay.portal.PortalException;
20  import com.liferay.portal.SystemException;
21  import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
22  import com.liferay.portal.kernel.log.Log;
23  import com.liferay.portal.kernel.log.LogFactoryUtil;
24  import com.liferay.portal.kernel.util.ArrayUtil;
25  import com.liferay.portal.kernel.util.MapUtil;
26  import com.liferay.portal.kernel.util.ObjectValuePair;
27  import com.liferay.portal.kernel.util.ProgressTracker;
28  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
29  import com.liferay.portal.kernel.util.StringBundler;
30  import com.liferay.portal.kernel.util.StringPool;
31  import com.liferay.portal.kernel.util.StringUtil;
32  import com.liferay.portal.kernel.util.Validator;
33  import com.liferay.portal.kernel.xml.Document;
34  import com.liferay.portal.kernel.xml.DocumentException;
35  import com.liferay.portal.kernel.xml.Element;
36  import com.liferay.portal.kernel.xml.SAXReaderUtil;
37  import com.liferay.portal.kernel.zip.ZipReader;
38  import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
39  import com.liferay.portal.model.User;
40  import com.liferay.portal.service.UserLocalServiceUtil;
41  import com.liferay.portal.util.PropsValues;
42  import com.liferay.portlet.tags.NoSuchEntryException;
43  import com.liferay.portlet.tags.model.TagsEntry;
44  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
45  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
46  import com.liferay.portlet.tags.util.TagsUtil;
47  import com.liferay.portlet.wiki.ImportFilesException;
48  import com.liferay.portlet.wiki.NoSuchPageException;
49  import com.liferay.portlet.wiki.importers.WikiImporter;
50  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
51  import com.liferay.portlet.wiki.model.WikiNode;
52  import com.liferay.portlet.wiki.model.WikiPage;
53  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
54  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
55  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
56  
57  import java.io.File;
58  import java.io.FileReader;
59  import java.io.IOException;
60  
61  import java.util.ArrayList;
62  import java.util.Collections;
63  import java.util.HashMap;
64  import java.util.Iterator;
65  import java.util.List;
66  import java.util.Map;
67  import java.util.regex.Matcher;
68  import java.util.regex.Pattern;
69  
70  /**
71   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
72   *
73   * @author Alvaro del Castillo
74   * @author Jorge Ferrer
75   */
76  public class MediaWikiImporter implements WikiImporter {
77  
78      public static final String SHARED_IMAGES_CONTENT = "See attachments";
79  
80      public static final String SHARED_IMAGES_TITLE = "SharedImages";
81  
82      public void importPages(
83              long userId, WikiNode node, File[] files,
84              Map<String, String[]> options)
85          throws PortalException {
86  
87          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
88              throw new PortalException("The pages file is mandatory");
89          }
90  
91          File pagesFile = files[0];
92          File usersFile = files[1];
93          File imagesFile = files[2];
94  
95          try {
96              Document doc = SAXReaderUtil.read(pagesFile);
97  
98              Map<String, String> usersMap = readUsersFile(usersFile);
99  
100             Element root = doc.getRootElement();
101 
102             List<String> specialNamespaces = readSpecialNamespaces(root);
103 
104             processSpecialPages(userId, node, root, specialNamespaces);
105             processRegularPages(
106                 userId, node, root, specialNamespaces, usersMap, imagesFile,
107                 options);
108             processImages(userId, node, imagesFile);
109 
110             moveFrontPage(userId, node, options);
111         }
112         catch (DocumentException de) {
113             throw new ImportFilesException("Invalid XML file provided");
114         }
115         catch (IOException de) {
116             throw new ImportFilesException("Error reading the files provided");
117         }
118         catch (PortalException e) {
119             throw e;
120         }
121         catch (Exception e) {
122             throw new PortalException(e);
123         }
124     }
125 
126     protected long getUserId(
127             long userId, WikiNode node, String author,
128             Map<String, String> usersMap)
129         throws PortalException, SystemException {
130 
131         User user = null;
132 
133         String emailAddress = usersMap.get(author);
134 
135         try {
136             if (Validator.isNull(emailAddress)) {
137                 user = UserLocalServiceUtil.getUserByScreenName(
138                     node.getCompanyId(), author.toLowerCase());
139             }
140             else {
141                 user = UserLocalServiceUtil.getUserByEmailAddress(
142                     node.getCompanyId(), emailAddress);
143             }
144         }
145         catch (NoSuchUserException nsue) {
146             user = UserLocalServiceUtil.getUserById(userId);
147         }
148 
149         return user.getUserId();
150     }
151 
152     protected void importPage(
153             long userId, String author, WikiNode node, String title,
154             String content, String summary, Map<String, String> usersMap)
155         throws PortalException {
156 
157         try {
158             long authorUserId = getUserId(userId, node, author, usersMap);
159             String parentTitle = readParentTitle(content);
160             String redirectTitle = readRedirectTitle(content);
161             String[] tagsEntries = readTagsEntries(userId, node, content);
162 
163             if (Validator.isNull(redirectTitle)) {
164                 content = _translator.translate(content);
165             }
166             else {
167                 content =
168                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
169                         StringPool.DOUBLE_CLOSE_BRACKET;
170             }
171 
172             WikiPage page = null;
173 
174             try {
175                 page = WikiPageLocalServiceUtil.getPage(
176                     node.getNodeId(), title);
177             }
178             catch (NoSuchPageException nspe) {
179                 page = WikiPageLocalServiceUtil.addPage(
180                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
181                     null, true, null, null);
182             }
183 
184             WikiPageLocalServiceUtil.updatePage(
185                 authorUserId, node.getNodeId(), title, page.getVersion(),
186                 content, summary, true, "creole", parentTitle,
187                 redirectTitle, tagsEntries, null, null);
188         }
189         catch (Exception e) {
190             throw new PortalException("Error importing page " + title, e);
191         }
192     }
193 
194     protected boolean isSpecialMediaWikiPage(
195         String title, List<String> specialNamespaces) {
196 
197         for (String namespace: specialNamespaces) {
198             if (title.startsWith(namespace + StringPool.COLON)) {
199                 return true;
200             }
201         }
202 
203         return false;
204     }
205 
206     protected boolean isValidImage(String[] paths, byte[] bytes) {
207         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
208             return false;
209         }
210 
211         if ((paths.length > 1) &&
212             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
213 
214             return false;
215         }
216 
217         String fileName = paths[paths.length - 1];
218 
219         try {
220             DLLocalServiceUtil.validate(fileName, bytes);
221         }
222         catch (PortalException pe) {
223             return false;
224         }
225         catch (SystemException se) {
226             return false;
227         }
228 
229         return true;
230     }
231 
232     protected void moveFrontPage(
233         long userId, WikiNode node, Map<String, String[]> options) {
234 
235         String frontPageTitle = MapUtil.getString(
236             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
237 
238         if (Validator.isNotNull(frontPageTitle)) {
239             frontPageTitle = normalizeTitle(frontPageTitle);
240 
241             try {
242                 if (WikiPageLocalServiceUtil.getPagesCount(
243                         node.getNodeId(), frontPageTitle, true) > 0) {
244 
245                     WikiPageLocalServiceUtil.movePage(
246                         userId, node.getNodeId(), frontPageTitle,
247                         WikiPageImpl.FRONT_PAGE, false, null, null);
248 
249                 }
250             }
251             catch (Exception e) {
252                 if (_log.isWarnEnabled()) {
253                     StringBundler sb = new StringBundler(4);
254 
255                     sb.append("Could not move ");
256                     sb.append(WikiPageImpl.FRONT_PAGE);
257                     sb.append(" to the title provided: ");
258                     sb.append(frontPageTitle);
259 
260                     _log.warn(sb.toString(), e);
261                 }
262             }
263 
264         }
265 
266     }
267 
268     protected String normalize(String categoryName, int length) {
269         categoryName = TagsUtil.toWord(categoryName.trim());
270 
271         return StringUtil.shorten(categoryName, length);
272     }
273 
274     protected String normalizeDescription(String description) {
275         description = description.replaceAll(
276             _categoriesPattern.pattern(), StringPool.BLANK);
277 
278         return normalize(description, 300);
279     }
280 
281     protected String normalizeTitle(String title) {
282         title = title.replaceAll(
283             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
284 
285         return StringUtil.shorten(title, 75);
286     }
287 
288     protected void processImages(long userId, WikiNode node, File imagesFile)
289         throws Exception {
290 
291         if ((imagesFile == null) || (!imagesFile.exists())) {
292             return;
293         }
294 
295         ProgressTracker progressTracker =
296             ProgressTrackerThreadLocal.getProgressTracker();
297 
298         int count = 0;
299 
300         ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
301 
302         List<String> entries = zipReader.getEntries();
303 
304         int total = entries.size();
305 
306         if (total > 0) {
307             try {
308                 WikiPageLocalServiceUtil.getPage(
309                     node.getNodeId(), SHARED_IMAGES_TITLE);
310             }
311             catch (NoSuchPageException nspe) {
312                 WikiPageLocalServiceUtil.addPage(
313                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
314                     SHARED_IMAGES_CONTENT, null, true, null, null);
315             }
316         }
317 
318         List<ObjectValuePair<String, byte[]>> attachments =
319             new ArrayList<ObjectValuePair<String, byte[]>>();
320 
321         int percentage = 50;
322 
323         for (int i = 0; i < entries.size(); i++) {
324             String entry = entries.get(i);
325 
326             String key = entry;
327             byte[] value = zipReader.getEntryAsByteArray(entry);
328 
329             String[] paths = StringUtil.split(key, StringPool.SLASH);
330 
331             if (!isValidImage(paths, value)) {
332                 if (_log.isInfoEnabled()) {
333                     _log.info("Ignoring " + key);
334                 }
335 
336                 continue;
337             }
338 
339             String fileName = paths[paths.length - 1].toLowerCase();
340 
341             attachments.add(
342                 new ObjectValuePair<String, byte[]>(fileName, value));
343 
344             count++;
345 
346             if ((i % 5) == 0) {
347                 WikiPageLocalServiceUtil.addPageAttachments(
348                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
349 
350                 attachments.clear();
351 
352                 percentage = Math.min(50 + (i * 50) / total, 99);
353 
354                 progressTracker.updateProgress(percentage);
355             }
356         }
357 
358         if (!attachments.isEmpty()) {
359             WikiPageLocalServiceUtil.addPageAttachments(
360                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
361         }
362 
363         zipReader.close();
364 
365         if (_log.isInfoEnabled()) {
366             _log.info("Imported " + count + " images into " + node.getName());
367         }
368     }
369 
370     protected void processRegularPages(
371         long userId, WikiNode node, Element root,
372         List<String> specialNamespaces, Map<String, String> usersMap,
373         File imagesFile, Map<String, String[]> options) {
374 
375         boolean importLatestVersion = MapUtil.getBoolean(
376             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
377 
378         ProgressTracker progressTracker =
379             ProgressTrackerThreadLocal.getProgressTracker();
380 
381         int count = 0;
382 
383         List<Element> pages = root.elements("page");
384 
385         int total = pages.size();
386 
387         Iterator<Element> itr = root.elements("page").iterator();
388 
389         int percentage = 10;
390         int maxPercentage = 50;
391 
392         if ((imagesFile == null) || (!imagesFile.exists())) {
393             maxPercentage = 99;
394         }
395 
396         int percentageRange = maxPercentage - percentage;
397 
398         for (int i = 0; itr.hasNext(); i++) {
399             Element pageEl = itr.next();
400 
401             String title = pageEl.elementText("title");
402 
403             title = normalizeTitle(title);
404 
405             percentage = Math.min(
406                 10 + (i * percentageRange) / total, maxPercentage);
407 
408             progressTracker.updateProgress(percentage);
409 
410             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
411                 continue;
412             }
413 
414             List<Element> revisionEls = pageEl.elements("revision");
415 
416             if (importLatestVersion) {
417                 Element lastRevisionEl = revisionEls.get(
418                     revisionEls.size() - 1);
419 
420                 revisionEls = new ArrayList<Element>();
421 
422                 revisionEls.add(lastRevisionEl);
423             }
424 
425             for (Element curRevisionEl : revisionEls) {
426                 String author = curRevisionEl.element(
427                     "contributor").elementText("username");
428                 String content = curRevisionEl.elementText("text");
429                 String summary = curRevisionEl.elementText("comment");
430 
431                 try {
432                     importPage(
433                         userId, author, node, title, content, summary,
434                         usersMap);
435                 }
436                 catch (Exception e) {
437                     if (_log.isWarnEnabled()) {
438                         StringBundler sb = new StringBundler(3);
439 
440                         sb.append("Page with title ");
441                         sb.append(title);
442                         sb.append(" could not be imported");
443 
444                         _log.warn(sb.toString(), e);
445                     }
446                 }
447             }
448 
449             count++;
450         }
451 
452         if (_log.isInfoEnabled()) {
453             _log.info("Imported " + count + " pages into " + node.getName());
454         }
455     }
456 
457     protected void processSpecialPages(
458             long userId, WikiNode node, Element root,
459             List<String> specialNamespaces)
460         throws PortalException {
461 
462         ProgressTracker progressTracker =
463             ProgressTrackerThreadLocal.getProgressTracker();
464 
465         List<Element> pages = root.elements("page");
466 
467         int total = pages.size();
468 
469         Iterator<Element> itr = pages.iterator();
470 
471         for (int i = 0; itr.hasNext(); i++) {
472             Element page = itr.next();
473 
474             String title = page.elementText("title");
475 
476             if (!title.startsWith("Category:")) {
477                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
478                     root.remove(page);
479                 }
480 
481                 continue;
482             }
483 
484             String categoryName = title.substring("Category:".length());
485 
486             categoryName = normalize(categoryName, 75);
487 
488             String description = page.element("revision").elementText("text");
489 
490             description = normalizeDescription(description);
491 
492             try {
493                 TagsEntry tagsEntry = null;
494 
495                 try {
496                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
497                         node.getCompanyId(), categoryName);
498                 }
499                 catch (NoSuchEntryException nsee) {
500                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
501                         userId, categoryName);
502                 }
503 
504                 if (Validator.isNotNull(description)) {
505                     TagsPropertyLocalServiceUtil.addProperty(
506                         userId, tagsEntry.getEntryId(), "description",
507                         description);
508                 }
509             }
510             catch (SystemException se) {
511                  _log.error(se, se);
512             }
513 
514             if ((i % 5) == 0) {
515                 progressTracker.updateProgress((i * 10) / total);
516             }
517         }
518     }
519 
520     protected String readParentTitle(String content) {
521         Matcher matcher = _parentPattern.matcher(content);
522 
523         String redirectTitle = StringPool.BLANK;
524 
525         if (matcher.find()) {
526             redirectTitle = matcher.group(1);
527 
528             redirectTitle = normalizeTitle(redirectTitle);
529 
530             redirectTitle += " (disambiguation)";
531         }
532 
533         return redirectTitle;
534     }
535     protected String readRedirectTitle(String content) {
536         Matcher matcher = _redirectPattern.matcher(content);
537 
538         String redirectTitle = StringPool.BLANK;
539 
540         if (matcher.find()) {
541             redirectTitle = matcher.group(1);
542 
543             redirectTitle = normalizeTitle(redirectTitle);
544         }
545 
546         return redirectTitle;
547     }
548     protected List<String> readSpecialNamespaces(Element root)
549         throws ImportFilesException {
550 
551         List<String> namespaces = new ArrayList<String>();
552 
553         Element siteinfoEl = root.element("siteinfo");
554 
555         if (siteinfoEl == null) {
556             throw new ImportFilesException("Invalid pages XML file");
557         }
558 
559         Iterator<Element> itr = siteinfoEl.element(
560             "namespaces").elements("namespace").iterator();
561 
562         while (itr.hasNext()) {
563             Element namespace = itr.next();
564 
565             if (!namespace.attribute("key").getData().equals("0")) {
566                 namespaces.add(namespace.getText());
567             }
568         }
569 
570         return namespaces;
571     }
572 
573     protected String[] readTagsEntries(
574             long userId, WikiNode node, String content)
575         throws PortalException, SystemException {
576 
577         Matcher matcher = _categoriesPattern.matcher(content);
578 
579         List<String> tagsEntries = new ArrayList<String>();
580 
581         while (matcher.find()) {
582             String categoryName = matcher.group(1);
583 
584             categoryName = normalize(categoryName, 75);
585 
586             TagsEntry tagsEntry = null;
587 
588             try {
589                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
590                     node.getCompanyId(), categoryName);
591             }
592             catch (NoSuchEntryException nsee) {
593                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
594                     userId, categoryName);
595             }
596 
597             tagsEntries.add(tagsEntry.getName());
598         }
599 
600         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
601             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
602         }
603 
604         return tagsEntries.toArray(new String[tagsEntries.size()]);
605     }
606 
607     protected Map<String, String> readUsersFile(File usersFile)
608         throws IOException {
609 
610         if ((usersFile == null) || (!usersFile.exists())) {
611             return Collections.EMPTY_MAP;
612         }
613 
614         Map<String, String> usersMap = new HashMap<String, String>();
615 
616         UnsyncBufferedReader unsyncBufferedReader =
617             new UnsyncBufferedReader(new FileReader(usersFile));
618 
619         String line = unsyncBufferedReader.readLine();
620 
621         while (line != null) {
622             String[] array = StringUtil.split(line);
623 
624             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
625                 (Validator.isNotNull(array[1]))) {
626 
627                 usersMap.put(array[0], array[1]);
628             }
629             else {
630                 if (_log.isInfoEnabled()) {
631                     _log.info(
632                         "Ignoring line " + line +
633                             " because it does not contain exactly 2 columns");
634                 }
635             }
636 
637             line = unsyncBufferedReader.readLine();
638         }
639 
640         return usersMap;
641     }
642 
643     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
644         "thumb", "temp", "archive"
645     };
646 
647     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
648 
649     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
650 
651     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
652 
653     private static Pattern _categoriesPattern = Pattern.compile(
654         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
655     private static Pattern _parentPattern = Pattern.compile(
656         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
657     private static Pattern _redirectPattern = Pattern.compile(
658         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
659 
660     private MediaWikiToCreoleTranslator _translator =
661         new MediaWikiToCreoleTranslator();
662 
663 }