001
014
015 package com.liferay.portlet.wiki.importers.mediawiki;
016
017 import com.liferay.portal.NoSuchUserException;
018 import com.liferay.portal.kernel.exception.PortalException;
019 import com.liferay.portal.kernel.exception.SystemException;
020 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
021 import com.liferay.portal.kernel.log.Log;
022 import com.liferay.portal.kernel.log.LogFactoryUtil;
023 import com.liferay.portal.kernel.util.ArrayUtil;
024 import com.liferay.portal.kernel.util.CharPool;
025 import com.liferay.portal.kernel.util.MapUtil;
026 import com.liferay.portal.kernel.util.ObjectValuePair;
027 import com.liferay.portal.kernel.util.ProgressTracker;
028 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
029 import com.liferay.portal.kernel.util.StreamUtil;
030 import com.liferay.portal.kernel.util.StringBundler;
031 import com.liferay.portal.kernel.util.StringPool;
032 import com.liferay.portal.kernel.util.StringUtil;
033 import com.liferay.portal.kernel.util.Validator;
034 import com.liferay.portal.kernel.xml.Document;
035 import com.liferay.portal.kernel.xml.DocumentException;
036 import com.liferay.portal.kernel.xml.Element;
037 import com.liferay.portal.kernel.xml.SAXReaderUtil;
038 import com.liferay.portal.kernel.zip.ZipReader;
039 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040 import com.liferay.portal.model.User;
041 import com.liferay.portal.service.ServiceContext;
042 import com.liferay.portal.service.UserLocalServiceUtil;
043 import com.liferay.portal.util.PropsValues;
044 import com.liferay.portlet.asset.NoSuchTagException;
045 import com.liferay.portlet.asset.model.AssetTag;
046 import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047 import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048 import com.liferay.portlet.asset.util.AssetUtil;
049 import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050 import com.liferay.portlet.wiki.ImportFilesException;
051 import com.liferay.portlet.wiki.NoSuchPageException;
052 import com.liferay.portlet.wiki.importers.WikiImporter;
053 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054 import com.liferay.portlet.wiki.model.WikiNode;
055 import com.liferay.portlet.wiki.model.WikiPage;
056 import com.liferay.portlet.wiki.model.WikiPageConstants;
057 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059
060 import java.io.IOException;
061 import java.io.InputStream;
062 import java.io.InputStreamReader;
063
064 import java.util.ArrayList;
065 import java.util.Collections;
066 import java.util.HashMap;
067 import java.util.Iterator;
068 import java.util.List;
069 import java.util.Map;
070 import java.util.regex.Matcher;
071 import java.util.regex.Pattern;
072
073
077 public class MediaWikiImporter implements WikiImporter {
078
079 public static final String SHARED_IMAGES_CONTENT = "See attachments";
080
081 public static final String SHARED_IMAGES_TITLE = "SharedImages";
082
083 public void importPages(
084 long userId, WikiNode node, InputStream[] inputStreams,
085 Map<String, String[]> options)
086 throws PortalException {
087
088 if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
089 throw new PortalException("The pages file is mandatory");
090 }
091
092 InputStream pagesInputStream = inputStreams[0];
093 InputStream usersInputStream = inputStreams[1];
094 InputStream imagesInputStream = inputStreams[2];
095
096 try {
097 Document document = SAXReaderUtil.read(pagesInputStream);
098
099 Map<String, String> usersMap = readUsersFile(usersInputStream);
100
101 Element rootElement = document.getRootElement();
102
103 List<String> specialNamespaces = readSpecialNamespaces(rootElement);
104
105 processSpecialPages(userId, node, rootElement, specialNamespaces);
106 processRegularPages(
107 userId, node, rootElement, specialNamespaces, usersMap,
108 imagesInputStream, options);
109 processImages(userId, node, imagesInputStream);
110
111 moveFrontPage(userId, node, options);
112 }
113 catch (DocumentException de) {
114 throw new ImportFilesException("Invalid XML file provided");
115 }
116 catch (IOException de) {
117 throw new ImportFilesException("Error reading the files provided");
118 }
119 catch (PortalException e) {
120 throw e;
121 }
122 catch (Exception e) {
123 throw new PortalException(e);
124 }
125 }
126
127 protected long getUserId(
128 long userId, WikiNode node, String author,
129 Map<String, String> usersMap)
130 throws PortalException, SystemException {
131
132 User user = null;
133
134 String emailAddress = usersMap.get(author);
135
136 try {
137 if (Validator.isNull(emailAddress)) {
138 user = UserLocalServiceUtil.getUserByScreenName(
139 node.getCompanyId(), author.toLowerCase());
140 }
141 else {
142 user = UserLocalServiceUtil.getUserByEmailAddress(
143 node.getCompanyId(), emailAddress);
144 }
145 }
146 catch (NoSuchUserException nsue) {
147 user = UserLocalServiceUtil.getUserById(userId);
148 }
149
150 return user.getUserId();
151 }
152
153 protected void importPage(
154 long userId, String author, WikiNode node, String title,
155 String content, String summary, Map<String, String> usersMap,
156 boolean strictImportMode)
157 throws PortalException {
158
159 try {
160 long authorUserId = getUserId(userId, node, author, usersMap);
161 String parentTitle = readParentTitle(content);
162 String redirectTitle = readRedirectTitle(content);
163
164 ServiceContext serviceContext = new ServiceContext();
165
166 serviceContext.setAddGroupPermissions(true);
167 serviceContext.setAddGuestPermissions(true);
168 serviceContext.setAssetTagNames(
169 readAssetTagNames(userId, node, content));
170
171 if (Validator.isNull(redirectTitle)) {
172 _translator.setStrictImportMode(strictImportMode);
173
174 content = _translator.translate(content);
175 }
176 else {
177 content =
178 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
179 StringPool.DOUBLE_CLOSE_BRACKET;
180 }
181
182 WikiPage page = null;
183
184 try {
185 page = WikiPageLocalServiceUtil.getPage(
186 node.getNodeId(), title);
187 }
188 catch (NoSuchPageException nspe) {
189 page = WikiPageLocalServiceUtil.addPage(
190 authorUserId, node.getNodeId(), title,
191 WikiPageConstants.NEW, null, true, serviceContext);
192 }
193
194 WikiPageLocalServiceUtil.updatePage(
195 authorUserId, node.getNodeId(), title, page.getVersion(),
196 content, summary, true, "creole", parentTitle, redirectTitle,
197 serviceContext);
198 }
199 catch (Exception e) {
200 throw new PortalException("Error importing page " + title, e);
201 }
202 }
203
204 protected boolean isSpecialMediaWikiPage(
205 String title, List<String> specialNamespaces) {
206
207 for (String namespace: specialNamespaces) {
208 if (title.startsWith(namespace + StringPool.COLON)) {
209 return true;
210 }
211 }
212
213 return false;
214 }
215
216 protected boolean isValidImage(String[] paths, InputStream inputStream) {
217 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
218 return false;
219 }
220
221 if ((paths.length > 1) &&
222 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
223
224 return false;
225 }
226
227 String fileName = paths[paths.length - 1];
228
229 try {
230 DLStoreUtil.validate(fileName, true, inputStream);
231 }
232 catch (PortalException pe) {
233 return false;
234 }
235 catch (SystemException se) {
236 return false;
237 }
238
239 return true;
240 }
241
242 protected void moveFrontPage(
243 long userId, WikiNode node, Map<String, String[]> options) {
244
245 String frontPageTitle = MapUtil.getString(
246 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
247
248 if (Validator.isNotNull(frontPageTitle)) {
249 frontPageTitle = normalizeTitle(frontPageTitle);
250
251 try {
252 if (WikiPageLocalServiceUtil.getPagesCount(
253 node.getNodeId(), frontPageTitle, true) > 0) {
254
255 ServiceContext serviceContext = new ServiceContext();
256
257 serviceContext.setAddGroupPermissions(true);
258 serviceContext.setAddGuestPermissions(true);
259
260 WikiPageLocalServiceUtil.movePage(
261 userId, node.getNodeId(), frontPageTitle,
262 WikiPageConstants.FRONT_PAGE, false, serviceContext);
263
264 }
265 }
266 catch (Exception e) {
267 if (_log.isWarnEnabled()) {
268 StringBundler sb = new StringBundler(4);
269
270 sb.append("Could not move ");
271 sb.append(WikiPageConstants.FRONT_PAGE);
272 sb.append(" to the title provided: ");
273 sb.append(frontPageTitle);
274
275 _log.warn(sb.toString(), e);
276 }
277 }
278
279 }
280
281 }
282
283 protected String normalize(String categoryName, int length) {
284 categoryName = AssetUtil.toWord(categoryName.trim());
285
286 return StringUtil.shorten(categoryName, length);
287 }
288
289 protected String normalizeDescription(String description) {
290 description = description.replaceAll(
291 _categoriesPattern.pattern(), StringPool.BLANK);
292
293 return normalize(description, 300);
294 }
295
296 protected String normalizeTitle(String title) {
297 title = title.replaceAll(
298 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
299
300 return StringUtil.shorten(title, 75);
301 }
302
303 protected void processImages(
304 long userId, WikiNode node, InputStream imagesInputStream)
305 throws Exception {
306
307 if (imagesInputStream == null) {
308 return;
309 }
310
311 ProgressTracker progressTracker =
312 ProgressTrackerThreadLocal.getProgressTracker();
313
314 int count = 0;
315
316 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
317 imagesInputStream);
318
319 List<String> entries = zipReader.getEntries();
320
321 int total = entries.size();
322
323 if (total > 0) {
324 try {
325 WikiPageLocalServiceUtil.getPage(
326 node.getNodeId(), SHARED_IMAGES_TITLE);
327 }
328 catch (NoSuchPageException nspe) {
329 ServiceContext serviceContext = new ServiceContext();
330
331 serviceContext.setAddGroupPermissions(true);
332 serviceContext.setAddGuestPermissions(true);
333
334 WikiPageLocalServiceUtil.addPage(
335 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
336 SHARED_IMAGES_CONTENT, null, true, serviceContext);
337 }
338 }
339
340 List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
341 new ArrayList<ObjectValuePair<String, InputStream>>();
342
343 try {
344 int percentage = 50;
345
346 for (int i = 0; i < entries.size(); i++) {
347 String entry = entries.get(i);
348
349 String key = entry;
350
351 InputStream inputStream = zipReader.getEntryAsInputStream(
352 entry);
353
354 String[] paths = StringUtil.split(key, CharPool.SLASH);
355
356 if (!isValidImage(paths, inputStream)) {
357 if (_log.isInfoEnabled()) {
358 _log.info("Ignoring " + key);
359 }
360
361 continue;
362 }
363
364 String fileName = paths[paths.length - 1].toLowerCase();
365
366 ObjectValuePair<String, InputStream> inputStreamOVP =
367 new ObjectValuePair<String, InputStream>(
368 fileName, inputStream);
369
370 inputStreamOVPs.add(inputStreamOVP);
371
372 count++;
373
374 if ((i % 5) == 0) {
375 WikiPageLocalServiceUtil.addPageAttachments(
376 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
377 inputStreamOVPs);
378
379 inputStreamOVPs.clear();
380
381 percentage = Math.min(50 + (i * 50) / total, 99);
382
383 progressTracker.updateProgress(percentage);
384 }
385 }
386
387 if (!inputStreamOVPs.isEmpty()) {
388 WikiPageLocalServiceUtil.addPageAttachments(
389 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
390 inputStreamOVPs);
391 }
392 }
393 finally {
394 for (ObjectValuePair<String, InputStream> inputStreamOVP :
395 inputStreamOVPs) {
396
397 InputStream inputStream = inputStreamOVP.getValue();
398
399 StreamUtil.cleanUp(inputStream);
400 }
401 }
402
403 zipReader.close();
404
405 if (_log.isInfoEnabled()) {
406 _log.info("Imported " + count + " images into " + node.getName());
407 }
408 }
409
410 protected void processRegularPages(
411 long userId, WikiNode node, Element rootElement,
412 List<String> specialNamespaces, Map<String, String> usersMap,
413 InputStream imagesInputStream, Map<String, String[]> options) {
414
415 boolean importLatestVersion = MapUtil.getBoolean(
416 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
417 boolean strictImportMode = MapUtil.getBoolean(
418 options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
419
420 ProgressTracker progressTracker =
421 ProgressTrackerThreadLocal.getProgressTracker();
422
423 int count = 0;
424
425 int percentage = 10;
426
427 int maxPercentage = 50;
428
429 if (imagesInputStream == null) {
430 maxPercentage = 99;
431 }
432
433 List<Element> pageElements = rootElement.elements("page");
434
435 for (int i = 0; i < pageElements.size(); i++) {
436 Element pageElement = pageElements.get(i);
437
438 String title = pageElement.elementText("title");
439
440 title = normalizeTitle(title);
441
442 percentage = Math.min(
443 10 + (i * (maxPercentage - percentage)) / pageElements.size(),
444 maxPercentage);
445
446 progressTracker.updateProgress(percentage);
447
448 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
449 continue;
450 }
451
452 List<Element> revisionElements = pageElement.elements("revision");
453
454 if (importLatestVersion) {
455 Element lastRevisionElement = revisionElements.get(
456 revisionElements.size() - 1);
457
458 revisionElements = new ArrayList<Element>();
459
460 revisionElements.add(lastRevisionElement);
461 }
462
463 for (Element revisionElement : revisionElements) {
464 Element contributorElement = revisionElement.element(
465 "contributor");
466
467 String author = contributorElement.elementText("username");
468
469 String content = revisionElement.elementText("text");
470 String summary = revisionElement.elementText("comment");
471
472 try {
473 importPage(
474 userId, author, node, title, content, summary, usersMap,
475 strictImportMode);
476 }
477 catch (Exception e) {
478 if (_log.isWarnEnabled()) {
479 _log.warn(
480 "Page with title " + title +
481 " could not be imported",
482 e);
483 }
484 }
485 }
486
487 count++;
488 }
489
490 if (_log.isInfoEnabled()) {
491 _log.info("Imported " + count + " pages into " + node.getName());
492 }
493 }
494
495 protected void processSpecialPages(
496 long userId, WikiNode node, Element rootElement,
497 List<String> specialNamespaces)
498 throws PortalException {
499
500 ProgressTracker progressTracker =
501 ProgressTrackerThreadLocal.getProgressTracker();
502
503 List<Element> pageElements = rootElement.elements("page");
504
505 for (int i = 0; i < pageElements.size(); i++) {
506 Element pageElement = pageElements.get(i);
507
508 String title = pageElement.elementText("title");
509
510 if (!title.startsWith("Category:")) {
511 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
512 rootElement.remove(pageElement);
513 }
514
515 continue;
516 }
517
518 String categoryName = title.substring("Category:".length());
519
520 categoryName = normalize(categoryName, 75);
521
522 Element revisionElement = pageElement.element("revision");
523
524 String description = revisionElement.elementText("text");
525
526 description = normalizeDescription(description);
527
528 try {
529 AssetTag assetTag = null;
530
531 try {
532 assetTag = AssetTagLocalServiceUtil.getTag(
533 node.getCompanyId(), categoryName);
534 }
535 catch (NoSuchTagException nste) {
536 ServiceContext serviceContext = new ServiceContext();
537
538 serviceContext.setAddGroupPermissions(true);
539 serviceContext.setAddGuestPermissions(true);
540 serviceContext.setScopeGroupId(node.getGroupId());
541
542 assetTag = AssetTagLocalServiceUtil.addTag(
543 userId, categoryName, null, serviceContext);
544 }
545
546 if (Validator.isNotNull(description)) {
547 AssetTagPropertyLocalServiceUtil.addTagProperty(
548 userId, assetTag.getTagId(), "description",
549 description);
550 }
551 }
552 catch (SystemException se) {
553 _log.error(se, se);
554 }
555
556 if ((i % 5) == 0) {
557 progressTracker.updateProgress((i * 10) / pageElements.size());
558 }
559 }
560 }
561
562 protected String[] readAssetTagNames(
563 long userId, WikiNode node, String content)
564 throws PortalException, SystemException {
565
566 Matcher matcher = _categoriesPattern.matcher(content);
567
568 List<String> assetTagNames = new ArrayList<String>();
569
570 while (matcher.find()) {
571 String categoryName = matcher.group(1);
572
573 categoryName = normalize(categoryName, 75);
574
575 AssetTag assetTag = null;
576
577 try {
578 assetTag = AssetTagLocalServiceUtil.getTag(
579 node.getGroupId(), categoryName);
580 }
581 catch (NoSuchTagException nste) {
582 ServiceContext serviceContext = new ServiceContext();
583
584 serviceContext.setAddGroupPermissions(true);
585 serviceContext.setAddGuestPermissions(true);
586 serviceContext.setScopeGroupId(node.getGroupId());
587
588 assetTag = AssetTagLocalServiceUtil.addTag(
589 userId, categoryName, null, serviceContext);
590 }
591
592 assetTagNames.add(assetTag.getName());
593 }
594
595 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
596 assetTagNames.add(_WORK_IN_PROGRESS_TAG);
597 }
598
599 return assetTagNames.toArray(new String[assetTagNames.size()]);
600 }
601
602 protected String readParentTitle(String content) {
603 Matcher matcher = _parentPattern.matcher(content);
604
605 String redirectTitle = StringPool.BLANK;
606
607 if (matcher.find()) {
608 redirectTitle = matcher.group(1);
609
610 redirectTitle = normalizeTitle(redirectTitle);
611
612 redirectTitle += " (disambiguation)";
613 }
614
615 return redirectTitle;
616 }
617 protected String readRedirectTitle(String content) {
618 Matcher matcher = _redirectPattern.matcher(content);
619
620 String redirectTitle = StringPool.BLANK;
621
622 if (matcher.find()) {
623 redirectTitle = matcher.group(1);
624
625 redirectTitle = normalizeTitle(redirectTitle);
626 }
627
628 return redirectTitle;
629 }
630 protected List<String> readSpecialNamespaces(Element root)
631 throws ImportFilesException {
632
633 List<String> namespaces = new ArrayList<String>();
634
635 Element siteinfoEl = root.element("siteinfo");
636
637 if (siteinfoEl == null) {
638 throw new ImportFilesException("Invalid pages XML file");
639 }
640
641 Iterator<Element> itr = siteinfoEl.element(
642 "namespaces").elements("namespace").iterator();
643
644 while (itr.hasNext()) {
645 Element namespace = itr.next();
646
647 if (!namespace.attribute("key").getData().equals("0")) {
648 namespaces.add(namespace.getText());
649 }
650 }
651
652 return namespaces;
653 }
654
655 protected Map<String, String> readUsersFile(InputStream usersInputStream)
656 throws IOException {
657
658 if (usersInputStream == null) {
659 return Collections.emptyMap();
660 }
661
662 Map<String, String> usersMap = new HashMap<String, String>();
663
664 UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
665 new InputStreamReader(usersInputStream));
666
667 String line = unsyncBufferedReader.readLine();
668
669 while (line != null) {
670 String[] array = StringUtil.split(line);
671
672 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
673 (Validator.isNotNull(array[1]))) {
674
675 usersMap.put(array[0], array[1]);
676 }
677 else {
678 if (_log.isInfoEnabled()) {
679 _log.info(
680 "Ignoring line " + line +
681 " because it does not contain exactly 2 columns");
682 }
683 }
684
685 line = unsyncBufferedReader.readLine();
686 }
687
688 return usersMap;
689 }
690
691 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
692 "thumb", "temp", "archive"
693 };
694
695 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
696
697 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
698
699 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
700
701 private static Pattern _categoriesPattern = Pattern.compile(
702 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
703 private static Pattern _parentPattern = Pattern.compile(
704 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
705 private static Pattern _redirectPattern = Pattern.compile(
706 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
707
708 private MediaWikiToCreoleTranslator _translator =
709 new MediaWikiToCreoleTranslator();
710
711 }