1
14
15 package com.liferay.portlet.wiki.importers.mediawiki;
16
17 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
18 import com.liferay.portal.NoSuchUserException;
19 import com.liferay.portal.PortalException;
20 import com.liferay.portal.SystemException;
21 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
22 import com.liferay.portal.kernel.log.Log;
23 import com.liferay.portal.kernel.log.LogFactoryUtil;
24 import com.liferay.portal.kernel.util.ArrayUtil;
25 import com.liferay.portal.kernel.util.MapUtil;
26 import com.liferay.portal.kernel.util.ObjectValuePair;
27 import com.liferay.portal.kernel.util.ProgressTracker;
28 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
29 import com.liferay.portal.kernel.util.StringBundler;
30 import com.liferay.portal.kernel.util.StringPool;
31 import com.liferay.portal.kernel.util.StringUtil;
32 import com.liferay.portal.kernel.util.Validator;
33 import com.liferay.portal.kernel.xml.Document;
34 import com.liferay.portal.kernel.xml.DocumentException;
35 import com.liferay.portal.kernel.xml.Element;
36 import com.liferay.portal.kernel.xml.SAXReaderUtil;
37 import com.liferay.portal.kernel.zip.ZipReader;
38 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
39 import com.liferay.portal.model.User;
40 import com.liferay.portal.service.UserLocalServiceUtil;
41 import com.liferay.portal.util.PropsValues;
42 import com.liferay.portlet.tags.NoSuchEntryException;
43 import com.liferay.portlet.tags.model.TagsEntry;
44 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
45 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
46 import com.liferay.portlet.tags.util.TagsUtil;
47 import com.liferay.portlet.wiki.ImportFilesException;
48 import com.liferay.portlet.wiki.NoSuchPageException;
49 import com.liferay.portlet.wiki.importers.WikiImporter;
50 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
51 import com.liferay.portlet.wiki.model.WikiNode;
52 import com.liferay.portlet.wiki.model.WikiPage;
53 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
54 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
55 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
56
57 import java.io.File;
58 import java.io.FileReader;
59 import java.io.IOException;
60
61 import java.util.ArrayList;
62 import java.util.Collections;
63 import java.util.HashMap;
64 import java.util.Iterator;
65 import java.util.List;
66 import java.util.Map;
67 import java.util.regex.Matcher;
68 import java.util.regex.Pattern;
69
70
76 public class MediaWikiImporter implements WikiImporter {
77
78 public static final String SHARED_IMAGES_CONTENT = "See attachments";
79
80 public static final String SHARED_IMAGES_TITLE = "SharedImages";
81
82 public void importPages(
83 long userId, WikiNode node, File[] files,
84 Map<String, String[]> options)
85 throws PortalException {
86
87 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
88 throw new PortalException("The pages file is mandatory");
89 }
90
91 File pagesFile = files[0];
92 File usersFile = files[1];
93 File imagesFile = files[2];
94
95 try {
96 Document doc = SAXReaderUtil.read(pagesFile);
97
98 Map<String, String> usersMap = readUsersFile(usersFile);
99
100 Element root = doc.getRootElement();
101
102 List<String> specialNamespaces = readSpecialNamespaces(root);
103
104 processSpecialPages(userId, node, root, specialNamespaces);
105 processRegularPages(
106 userId, node, root, specialNamespaces, usersMap, imagesFile,
107 options);
108 processImages(userId, node, imagesFile);
109
110 moveFrontPage(userId, node, options);
111 }
112 catch (DocumentException de) {
113 throw new ImportFilesException("Invalid XML file provided");
114 }
115 catch (IOException de) {
116 throw new ImportFilesException("Error reading the files provided");
117 }
118 catch (PortalException e) {
119 throw e;
120 }
121 catch (Exception e) {
122 throw new PortalException(e);
123 }
124 }
125
126 protected long getUserId(
127 long userId, WikiNode node, String author,
128 Map<String, String> usersMap)
129 throws PortalException, SystemException {
130
131 User user = null;
132
133 String emailAddress = usersMap.get(author);
134
135 try {
136 if (Validator.isNull(emailAddress)) {
137 user = UserLocalServiceUtil.getUserByScreenName(
138 node.getCompanyId(), author.toLowerCase());
139 }
140 else {
141 user = UserLocalServiceUtil.getUserByEmailAddress(
142 node.getCompanyId(), emailAddress);
143 }
144 }
145 catch (NoSuchUserException nsue) {
146 user = UserLocalServiceUtil.getUserById(userId);
147 }
148
149 return user.getUserId();
150 }
151
152 protected void importPage(
153 long userId, String author, WikiNode node, String title,
154 String content, String summary, Map<String, String> usersMap)
155 throws PortalException {
156
157 try {
158 long authorUserId = getUserId(userId, node, author, usersMap);
159 String parentTitle = readParentTitle(content);
160 String redirectTitle = readRedirectTitle(content);
161 String[] tagsEntries = readTagsEntries(userId, node, content);
162
163 if (Validator.isNull(redirectTitle)) {
164 content = _translator.translate(content);
165 }
166 else {
167 content =
168 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
169 StringPool.DOUBLE_CLOSE_BRACKET;
170 }
171
172 WikiPage page = null;
173
174 try {
175 page = WikiPageLocalServiceUtil.getPage(
176 node.getNodeId(), title);
177 }
178 catch (NoSuchPageException nspe) {
179 page = WikiPageLocalServiceUtil.addPage(
180 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
181 null, true, null, null);
182 }
183
184 WikiPageLocalServiceUtil.updatePage(
185 authorUserId, node.getNodeId(), title, page.getVersion(),
186 content, summary, true, "creole", parentTitle,
187 redirectTitle, tagsEntries, null, null);
188 }
189 catch (Exception e) {
190 throw new PortalException("Error importing page " + title, e);
191 }
192 }
193
194 protected boolean isSpecialMediaWikiPage(
195 String title, List<String> specialNamespaces) {
196
197 for (String namespace: specialNamespaces) {
198 if (title.startsWith(namespace + StringPool.COLON)) {
199 return true;
200 }
201 }
202
203 return false;
204 }
205
206 protected boolean isValidImage(String[] paths, byte[] bytes) {
207 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
208 return false;
209 }
210
211 if ((paths.length > 1) &&
212 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
213
214 return false;
215 }
216
217 String fileName = paths[paths.length - 1];
218
219 try {
220 DLLocalServiceUtil.validate(fileName, bytes);
221 }
222 catch (PortalException pe) {
223 return false;
224 }
225 catch (SystemException se) {
226 return false;
227 }
228
229 return true;
230 }
231
232 protected void moveFrontPage(
233 long userId, WikiNode node, Map<String, String[]> options) {
234
235 String frontPageTitle = MapUtil.getString(
236 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
237
238 if (Validator.isNotNull(frontPageTitle)) {
239 frontPageTitle = normalizeTitle(frontPageTitle);
240
241 try {
242 if (WikiPageLocalServiceUtil.getPagesCount(
243 node.getNodeId(), frontPageTitle, true) > 0) {
244
245 WikiPageLocalServiceUtil.movePage(
246 userId, node.getNodeId(), frontPageTitle,
247 WikiPageImpl.FRONT_PAGE, false, null, null);
248
249 }
250 }
251 catch (Exception e) {
252 if (_log.isWarnEnabled()) {
253 StringBundler sb = new StringBundler(4);
254
255 sb.append("Could not move ");
256 sb.append(WikiPageImpl.FRONT_PAGE);
257 sb.append(" to the title provided: ");
258 sb.append(frontPageTitle);
259
260 _log.warn(sb.toString(), e);
261 }
262 }
263
264 }
265
266 }
267
268 protected String normalize(String categoryName, int length) {
269 categoryName = TagsUtil.toWord(categoryName.trim());
270
271 return StringUtil.shorten(categoryName, length);
272 }
273
274 protected String normalizeDescription(String description) {
275 description = description.replaceAll(
276 _categoriesPattern.pattern(), StringPool.BLANK);
277
278 return normalize(description, 300);
279 }
280
281 protected String normalizeTitle(String title) {
282 title = title.replaceAll(
283 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
284
285 return StringUtil.shorten(title, 75);
286 }
287
288 protected void processImages(long userId, WikiNode node, File imagesFile)
289 throws Exception {
290
291 if ((imagesFile == null) || (!imagesFile.exists())) {
292 return;
293 }
294
295 ProgressTracker progressTracker =
296 ProgressTrackerThreadLocal.getProgressTracker();
297
298 int count = 0;
299
300 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(imagesFile);
301
302 List<String> entries = zipReader.getEntries();
303
304 int total = entries.size();
305
306 if (total > 0) {
307 try {
308 WikiPageLocalServiceUtil.getPage(
309 node.getNodeId(), SHARED_IMAGES_TITLE);
310 }
311 catch (NoSuchPageException nspe) {
312 WikiPageLocalServiceUtil.addPage(
313 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
314 SHARED_IMAGES_CONTENT, null, true, null, null);
315 }
316 }
317
318 List<ObjectValuePair<String, byte[]>> attachments =
319 new ArrayList<ObjectValuePair<String, byte[]>>();
320
321 int percentage = 50;
322
323 for (int i = 0; i < entries.size(); i++) {
324 String entry = entries.get(i);
325
326 String key = entry;
327 byte[] value = zipReader.getEntryAsByteArray(entry);
328
329 String[] paths = StringUtil.split(key, StringPool.SLASH);
330
331 if (!isValidImage(paths, value)) {
332 if (_log.isInfoEnabled()) {
333 _log.info("Ignoring " + key);
334 }
335
336 continue;
337 }
338
339 String fileName = paths[paths.length - 1].toLowerCase();
340
341 attachments.add(
342 new ObjectValuePair<String, byte[]>(fileName, value));
343
344 count++;
345
346 if ((i % 5) == 0) {
347 WikiPageLocalServiceUtil.addPageAttachments(
348 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
349
350 attachments.clear();
351
352 percentage = Math.min(50 + (i * 50) / total, 99);
353
354 progressTracker.updateProgress(percentage);
355 }
356 }
357
358 if (!attachments.isEmpty()) {
359 WikiPageLocalServiceUtil.addPageAttachments(
360 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
361 }
362
363 zipReader.close();
364
365 if (_log.isInfoEnabled()) {
366 _log.info("Imported " + count + " images into " + node.getName());
367 }
368 }
369
370 protected void processRegularPages(
371 long userId, WikiNode node, Element root,
372 List<String> specialNamespaces, Map<String, String> usersMap,
373 File imagesFile, Map<String, String[]> options) {
374
375 boolean importLatestVersion = MapUtil.getBoolean(
376 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
377
378 ProgressTracker progressTracker =
379 ProgressTrackerThreadLocal.getProgressTracker();
380
381 int count = 0;
382
383 List<Element> pages = root.elements("page");
384
385 int total = pages.size();
386
387 Iterator<Element> itr = root.elements("page").iterator();
388
389 int percentage = 10;
390 int maxPercentage = 50;
391
392 if ((imagesFile == null) || (!imagesFile.exists())) {
393 maxPercentage = 99;
394 }
395
396 int percentageRange = maxPercentage - percentage;
397
398 for (int i = 0; itr.hasNext(); i++) {
399 Element pageEl = itr.next();
400
401 String title = pageEl.elementText("title");
402
403 title = normalizeTitle(title);
404
405 percentage = Math.min(
406 10 + (i * percentageRange) / total, maxPercentage);
407
408 progressTracker.updateProgress(percentage);
409
410 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
411 continue;
412 }
413
414 List<Element> revisionEls = pageEl.elements("revision");
415
416 if (importLatestVersion) {
417 Element lastRevisionEl = revisionEls.get(
418 revisionEls.size() - 1);
419
420 revisionEls = new ArrayList<Element>();
421
422 revisionEls.add(lastRevisionEl);
423 }
424
425 for (Element curRevisionEl : revisionEls) {
426 String author = curRevisionEl.element(
427 "contributor").elementText("username");
428 String content = curRevisionEl.elementText("text");
429 String summary = curRevisionEl.elementText("comment");
430
431 try {
432 importPage(
433 userId, author, node, title, content, summary,
434 usersMap);
435 }
436 catch (Exception e) {
437 if (_log.isWarnEnabled()) {
438 StringBundler sb = new StringBundler(3);
439
440 sb.append("Page with title ");
441 sb.append(title);
442 sb.append(" could not be imported");
443
444 _log.warn(sb.toString(), e);
445 }
446 }
447 }
448
449 count++;
450 }
451
452 if (_log.isInfoEnabled()) {
453 _log.info("Imported " + count + " pages into " + node.getName());
454 }
455 }
456
457 protected void processSpecialPages(
458 long userId, WikiNode node, Element root,
459 List<String> specialNamespaces)
460 throws PortalException {
461
462 ProgressTracker progressTracker =
463 ProgressTrackerThreadLocal.getProgressTracker();
464
465 List<Element> pages = root.elements("page");
466
467 int total = pages.size();
468
469 Iterator<Element> itr = pages.iterator();
470
471 for (int i = 0; itr.hasNext(); i++) {
472 Element page = itr.next();
473
474 String title = page.elementText("title");
475
476 if (!title.startsWith("Category:")) {
477 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
478 root.remove(page);
479 }
480
481 continue;
482 }
483
484 String categoryName = title.substring("Category:".length());
485
486 categoryName = normalize(categoryName, 75);
487
488 String description = page.element("revision").elementText("text");
489
490 description = normalizeDescription(description);
491
492 try {
493 TagsEntry tagsEntry = null;
494
495 try {
496 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
497 node.getCompanyId(), categoryName);
498 }
499 catch (NoSuchEntryException nsee) {
500 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
501 userId, categoryName);
502 }
503
504 if (Validator.isNotNull(description)) {
505 TagsPropertyLocalServiceUtil.addProperty(
506 userId, tagsEntry.getEntryId(), "description",
507 description);
508 }
509 }
510 catch (SystemException se) {
511 _log.error(se, se);
512 }
513
514 if ((i % 5) == 0) {
515 progressTracker.updateProgress((i * 10) / total);
516 }
517 }
518 }
519
520 protected String readParentTitle(String content) {
521 Matcher matcher = _parentPattern.matcher(content);
522
523 String redirectTitle = StringPool.BLANK;
524
525 if (matcher.find()) {
526 redirectTitle = matcher.group(1);
527
528 redirectTitle = normalizeTitle(redirectTitle);
529
530 redirectTitle += " (disambiguation)";
531 }
532
533 return redirectTitle;
534 }
535 protected String readRedirectTitle(String content) {
536 Matcher matcher = _redirectPattern.matcher(content);
537
538 String redirectTitle = StringPool.BLANK;
539
540 if (matcher.find()) {
541 redirectTitle = matcher.group(1);
542
543 redirectTitle = normalizeTitle(redirectTitle);
544 }
545
546 return redirectTitle;
547 }
548 protected List<String> readSpecialNamespaces(Element root)
549 throws ImportFilesException {
550
551 List<String> namespaces = new ArrayList<String>();
552
553 Element siteinfoEl = root.element("siteinfo");
554
555 if (siteinfoEl == null) {
556 throw new ImportFilesException("Invalid pages XML file");
557 }
558
559 Iterator<Element> itr = siteinfoEl.element(
560 "namespaces").elements("namespace").iterator();
561
562 while (itr.hasNext()) {
563 Element namespace = itr.next();
564
565 if (!namespace.attribute("key").getData().equals("0")) {
566 namespaces.add(namespace.getText());
567 }
568 }
569
570 return namespaces;
571 }
572
573 protected String[] readTagsEntries(
574 long userId, WikiNode node, String content)
575 throws PortalException, SystemException {
576
577 Matcher matcher = _categoriesPattern.matcher(content);
578
579 List<String> tagsEntries = new ArrayList<String>();
580
581 while (matcher.find()) {
582 String categoryName = matcher.group(1);
583
584 categoryName = normalize(categoryName, 75);
585
586 TagsEntry tagsEntry = null;
587
588 try {
589 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
590 node.getCompanyId(), categoryName);
591 }
592 catch (NoSuchEntryException nsee) {
593 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
594 userId, categoryName);
595 }
596
597 tagsEntries.add(tagsEntry.getName());
598 }
599
600 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
601 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
602 }
603
604 return tagsEntries.toArray(new String[tagsEntries.size()]);
605 }
606
607 protected Map<String, String> readUsersFile(File usersFile)
608 throws IOException {
609
610 if ((usersFile == null) || (!usersFile.exists())) {
611 return Collections.EMPTY_MAP;
612 }
613
614 Map<String, String> usersMap = new HashMap<String, String>();
615
616 UnsyncBufferedReader unsyncBufferedReader =
617 new UnsyncBufferedReader(new FileReader(usersFile));
618
619 String line = unsyncBufferedReader.readLine();
620
621 while (line != null) {
622 String[] array = StringUtil.split(line);
623
624 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
625 (Validator.isNotNull(array[1]))) {
626
627 usersMap.put(array[0], array[1]);
628 }
629 else {
630 if (_log.isInfoEnabled()) {
631 _log.info(
632 "Ignoring line " + line +
633 " because it does not contain exactly 2 columns");
634 }
635 }
636
637 line = unsyncBufferedReader.readLine();
638 }
639
640 return usersMap;
641 }
642
643 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
644 "thumb", "temp", "archive"
645 };
646
647 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
648
649 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
650
651 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
652
653 private static Pattern _categoriesPattern = Pattern.compile(
654 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
655 private static Pattern _parentPattern = Pattern.compile(
656 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
657 private static Pattern _redirectPattern = Pattern.compile(
658 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
659
660 private MediaWikiToCreoleTranslator _translator =
661 new MediaWikiToCreoleTranslator();
662
663 }