1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.util.ArrayUtil;
30 import com.liferay.portal.kernel.util.ObjectValuePair;
31 import com.liferay.portal.kernel.util.ProgressTracker;
32 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
33 import com.liferay.portal.kernel.util.StringPool;
34 import com.liferay.portal.kernel.util.StringUtil;
35 import com.liferay.portal.kernel.util.Validator;
36 import com.liferay.portal.kernel.xml.Document;
37 import com.liferay.portal.kernel.xml.DocumentException;
38 import com.liferay.portal.kernel.xml.Element;
39 import com.liferay.portal.kernel.xml.SAXReaderUtil;
40 import com.liferay.portal.kernel.zip.ZipReader;
41 import com.liferay.portal.model.User;
42 import com.liferay.portal.service.UserLocalServiceUtil;
43 import com.liferay.portal.util.PropsValues;
44 import com.liferay.portlet.tags.NoSuchEntryException;
45 import com.liferay.portlet.tags.model.TagsEntry;
46 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
47 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
48 import com.liferay.portlet.tags.util.TagsUtil;
49 import com.liferay.portlet.wiki.ImportFilesException;
50 import com.liferay.portlet.wiki.NoSuchPageException;
51 import com.liferay.portlet.wiki.importers.WikiImporter;
52 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
53 import com.liferay.portlet.wiki.model.WikiNode;
54 import com.liferay.portlet.wiki.model.WikiPage;
55 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
56 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
57 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
58 import com.liferay.util.MapUtil;
59
60 import java.io.BufferedReader;
61 import java.io.File;
62 import java.io.FileReader;
63 import java.io.IOException;
64
65 import java.util.ArrayList;
66 import java.util.Collections;
67 import java.util.HashMap;
68 import java.util.Iterator;
69 import java.util.List;
70 import java.util.Map;
71 import java.util.regex.Matcher;
72 import java.util.regex.Pattern;
73
74 import org.apache.commons.logging.Log;
75 import org.apache.commons.logging.LogFactory;
76
77
84 public class MediaWikiImporter implements WikiImporter {
85
86 public static final String SHARED_IMAGES_CONTENT = "See attachments";
87
88 public static final String SHARED_IMAGES_TITLE = "SharedImages";
89
90 public void importPages(
91 long userId, WikiNode node, File[] files,
92 Map<String, String[]> options)
93 throws PortalException {
94
95 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
96 throw new PortalException("The pages file is mandatory");
97 }
98
99 File pagesFile = files[0];
100 File usersFile = files[1];
101 File imagesFile = files[2];
102
103 try {
104 Document doc = SAXReaderUtil.read(pagesFile);
105
106 Map<String, String> usersMap = readUsersFile(usersFile);
107
108 Element root = doc.getRootElement();
109
110 List<String> specialNamespaces = readSpecialNamespaces(root);
111
112 processSpecialPages(userId, node, root, specialNamespaces);
113 processRegularPages(
114 userId, node, root, specialNamespaces, usersMap, imagesFile,
115 options);
116 processImages(userId, node, imagesFile);
117
118 moveFrontPage(userId, node, options);
119 }
120 catch (DocumentException de) {
121 throw new ImportFilesException("Invalid XML file provided");
122 }
123 catch (IOException de) {
124 throw new ImportFilesException("Error reading the files provided");
125 }
126 catch (PortalException e) {
127 throw e;
128 }
129 catch (Exception e) {
130 throw new PortalException(e);
131 }
132 }
133
134 protected long getUserId(
135 long userId, WikiNode node, String author,
136 Map<String, String> usersMap)
137 throws PortalException, SystemException {
138
139 User user = null;
140
141 String emailAddress = usersMap.get(author);
142
143 try {
144 if (Validator.isNull(emailAddress)) {
145 user = UserLocalServiceUtil.getUserByScreenName(
146 node.getCompanyId(), author.toLowerCase());
147 }
148 else {
149 user = UserLocalServiceUtil.getUserByEmailAddress(
150 node.getCompanyId(), emailAddress);
151 }
152 }
153 catch (NoSuchUserException nsue) {
154 user = UserLocalServiceUtil.getUserById(userId);
155 }
156
157 return user.getUserId();
158 }
159
160 protected void importPage(
161 long userId, String author, WikiNode node, String title,
162 String content, String summary, Map<String, String> usersMap)
163 throws PortalException {
164
165 try {
166 long authorUserId = getUserId(userId, node, author, usersMap);
167 String parentTitle = readParentTitle(content);
168 String redirectTitle = readRedirectTitle(content);
169 String[] tagsEntries = readTagsEntries(userId, node, content);
170
171 if (Validator.isNull(redirectTitle)) {
172 content = _translator.translate(content);
173 }
174 else {
175 content =
176 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
177 StringPool.DOUBLE_CLOSE_BRACKET;
178 }
179
180 WikiPage page = null;
181
182 try {
183 page = WikiPageLocalServiceUtil.getPage(
184 node.getNodeId(), title);
185 }
186 catch (NoSuchPageException nspe) {
187 page = WikiPageLocalServiceUtil.addPage(
188 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
189 null, true, null, null);
190 }
191
192 WikiPageLocalServiceUtil.updatePage(
193 authorUserId, node.getNodeId(), title, page.getVersion(),
194 content, summary, true, "creole", parentTitle,
195 redirectTitle, tagsEntries, null, null);
196 }
197 catch (Exception e) {
198 throw new PortalException("Error importing page " + title, e);
199 }
200 }
201
202 protected boolean isSpecialMediaWikiPage(
203 String title, List<String> specialNamespaces) {
204
205 for (String namespace: specialNamespaces) {
206 if (title.startsWith(namespace + StringPool.COLON)) {
207 return true;
208 }
209 }
210
211 return false;
212 }
213
214 protected boolean isValidImage(String[] paths, byte[] bytes) {
215 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
216 return false;
217 }
218
219 if ((paths.length > 1) &&
220 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
221
222 return false;
223 }
224
225 String fileName = paths[paths.length - 1];
226
227 try {
228 DLLocalServiceUtil.validate(fileName, bytes);
229 }
230 catch (PortalException pe) {
231 return false;
232 }
233
234 return true;
235 }
236
237 protected void moveFrontPage(
238 long userId, WikiNode node, Map<String, String[]> options) {
239
240 String frontPageTitle = MapUtil.getString(
241 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
242
243 if (Validator.isNotNull(frontPageTitle)) {
244 frontPageTitle = normalizeTitle(frontPageTitle);
245
246 try {
247 if (WikiPageLocalServiceUtil.getPagesCount(
248 node.getNodeId(), frontPageTitle, true) > 0) {
249
250 WikiPageLocalServiceUtil.movePage(
251 userId, node.getNodeId(), frontPageTitle,
252 WikiPageImpl.FRONT_PAGE, false, null, null);
253
254 }
255 }
256 catch (Exception e) {
257 if (_log.isWarnEnabled()) {
258 StringBuilder sb = new StringBuilder();
259
260 sb.append("Could not move ");
261 sb.append(WikiPageImpl.FRONT_PAGE);
262 sb.append(" to the title provided: ");
263 sb.append(frontPageTitle);
264
265 _log.warn(sb.toString(), e);
266 }
267 }
268
269 }
270
271 }
272
273 protected String normalize(String categoryName, int length) {
274 categoryName = TagsUtil.toWord(categoryName.trim());
275
276 return StringUtil.shorten(categoryName, length);
277 }
278
279 protected String normalizeDescription(String description) {
280 description = description.replaceAll(
281 _categoriesPattern.pattern(), StringPool.BLANK);
282
283 return normalize(description, 300);
284 }
285
286 protected String normalizeTitle(String title) {
287 title = title.replaceAll(
288 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
289
290 return StringUtil.shorten(title, 75);
291 }
292
293 private void processImages(long userId, WikiNode node, File imagesFile)
294 throws Exception {
295
296 if ((imagesFile == null) || (!imagesFile.exists())) {
297 return;
298 }
299
300 ProgressTracker progressTracker =
301 ProgressTrackerThreadLocal.getProgressTracker();
302
303 int count = 0;
304
305 ZipReader zipReader = new ZipReader(imagesFile);
306
307 Map<String, byte[]> entries = zipReader.getEntries();
308
309 int total = entries.size();
310
311 if (total > 0) {
312 try {
313 WikiPageLocalServiceUtil.getPage(
314 node.getNodeId(), SHARED_IMAGES_TITLE);
315 }
316 catch (NoSuchPageException nspe) {
317 WikiPageLocalServiceUtil.addPage(
318 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
319 SHARED_IMAGES_CONTENT, null, true, null, null);
320 }
321 }
322
323 List<ObjectValuePair<String, byte[]>> attachments =
324 new ArrayList<ObjectValuePair<String, byte[]>>();
325
326 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
327
328 int percentage = 50;
329
330 for (int i = 0; itr.hasNext(); i++) {
331 Map.Entry<String, byte[]> entry = itr.next();
332
333 String key = entry.getKey();
334 byte[] value = entry.getValue();
335
336 if (key.endsWith(StringPool.SLASH)) {
337 if (_log.isInfoEnabled()) {
338 _log.info("Ignoring " + key);
339 }
340
341 continue;
342 }
343
344 String[] paths = StringUtil.split(key, StringPool.SLASH);
345
346 if (!isValidImage(paths, value)) {
347 if (_log.isInfoEnabled()) {
348 _log.info("Ignoring " + key);
349 }
350
351 continue;
352 }
353
354 String fileName = paths[paths.length - 1].toLowerCase();
355
356 attachments.add(
357 new ObjectValuePair<String, byte[]>(fileName, value));
358
359 count++;
360
361 if ((i % 5) == 0) {
362 WikiPageLocalServiceUtil.addPageAttachments(
363 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
364
365 attachments.clear();
366
367 percentage = Math.min(50 + (i * 50) / total, 99);
368
369 progressTracker.updateProgress(percentage);
370 }
371 }
372
373 if (!attachments.isEmpty()) {
374 WikiPageLocalServiceUtil.addPageAttachments(
375 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
376 }
377
378 if (_log.isInfoEnabled()) {
379 _log.info("Imported " + count + " images into " + node.getName());
380 }
381 }
382
383 protected void processRegularPages(
384 long userId, WikiNode node, Element root,
385 List<String> specialNamespaces, Map<String, String> usersMap,
386 File imagesFile, Map<String, String[]> options) {
387
388 boolean importLatestVersion = MapUtil.getBoolean(
389 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
390
391 ProgressTracker progressTracker =
392 ProgressTrackerThreadLocal.getProgressTracker();
393
394 int count = 0;
395
396 List<Element> pages = root.elements("page");
397
398 int total = pages.size();
399
400 Iterator<Element> itr = root.elements("page").iterator();
401
402 int percentage = 10;
403 int maxPercentage = 50;
404
405 if ((imagesFile == null) || (!imagesFile.exists())) {
406 maxPercentage = 99;
407 }
408
409 int percentageRange = maxPercentage - percentage;
410
411 for (int i = 0; itr.hasNext(); i++) {
412 Element pageEl = itr.next();
413
414 String title = pageEl.elementText("title");
415
416 title = normalizeTitle(title);
417
418 percentage = Math.min(
419 10 + (i * percentageRange) / total, maxPercentage);
420
421 progressTracker.updateProgress(percentage);
422
423 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
424 continue;
425 }
426
427 List<Element> revisionEls = pageEl.elements("revision");
428
429 if (importLatestVersion) {
430 Element lastRevisionEl = revisionEls.get(
431 revisionEls.size() - 1);
432
433 revisionEls = new ArrayList<Element>();
434
435 revisionEls.add(lastRevisionEl);
436 }
437
438 for (Element curRevisionEl : revisionEls) {
439 String author = curRevisionEl.element(
440 "contributor").elementText("username");
441 String content = curRevisionEl.elementText("text");
442 String summary = curRevisionEl.elementText("comment");
443
444 try {
445 importPage(
446 userId, author, node, title, content, summary,
447 usersMap);
448 }
449 catch (Exception e) {
450 if (_log.isWarnEnabled()) {
451 StringBuilder sb = new StringBuilder();
452
453 sb.append("Page with title ");
454 sb.append(title);
455 sb.append(" could not be imported");
456
457 _log.warn(sb.toString(), e);
458 }
459 }
460 }
461
462 count++;
463 }
464
465 if (_log.isInfoEnabled()) {
466 _log.info("Imported " + count + " pages into " + node.getName());
467 }
468 }
469
470 protected void processSpecialPages(
471 long userId, WikiNode node, Element root,
472 List<String> specialNamespaces)
473 throws PortalException {
474
475 ProgressTracker progressTracker =
476 ProgressTrackerThreadLocal.getProgressTracker();
477
478 List<Element> pages = root.elements("page");
479
480 int total = pages.size();
481
482 Iterator<Element> itr = pages.iterator();
483
484 for (int i = 0; itr.hasNext(); i++) {
485 Element page = itr.next();
486
487 String title = page.elementText("title");
488
489 if (!title.startsWith("Category:")) {
490 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
491 root.remove(page);
492 }
493
494 continue;
495 }
496
497 String categoryName = title.substring("Category:".length());
498
499 categoryName = normalize(categoryName, 75);
500
501 String description = page.element("revision").elementText("text");
502
503 description = normalizeDescription(description);
504
505 try {
506 TagsEntry tagsEntry = null;
507
508 try {
509 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
510 node.getCompanyId(), categoryName);
511 }
512 catch (NoSuchEntryException nsee) {
513 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
514 userId, categoryName);
515 }
516
517 if (Validator.isNotNull(description)) {
518 TagsPropertyLocalServiceUtil.addProperty(
519 userId, tagsEntry.getEntryId(), "description",
520 description);
521 }
522 }
523 catch (SystemException se) {
524 _log.error(se, se);
525 }
526
527 if ((i % 5) == 0) {
528 progressTracker.updateProgress((i * 10) / total);
529 }
530 }
531 }
532
533 protected String readParentTitle(String content) {
534 Matcher matcher = _parentPattern.matcher(content);
535
536 String redirectTitle = StringPool.BLANK;
537
538 if (matcher.find()) {
539 redirectTitle = matcher.group(1);
540
541 redirectTitle = normalizeTitle(redirectTitle);
542
543 redirectTitle += " (disambiguation)";
544 }
545
546 return redirectTitle;
547 }
548
549 protected String readRedirectTitle(String content) {
550 Matcher matcher = _redirectPattern.matcher(content);
551
552 String redirectTitle = StringPool.BLANK;
553
554 if (matcher.find()) {
555 redirectTitle = matcher.group(1);
556
557 redirectTitle = normalizeTitle(redirectTitle);
558 }
559
560 return redirectTitle;
561 }
562
563 protected List<String> readSpecialNamespaces(Element root)
564 throws ImportFilesException {
565
566 List<String> namespaces = new ArrayList<String>();
567
568 Element siteinfoEl = root.element("siteinfo");
569
570 if (siteinfoEl == null) {
571 throw new ImportFilesException("Invalid pages XML file");
572 }
573
574 Iterator<Element> itr = siteinfoEl.element(
575 "namespaces").elements("namespace").iterator();
576
577 while (itr.hasNext()) {
578 Element namespace = itr.next();
579
580 if (!namespace.attribute("key").equals("0")) {
581 namespaces.add(namespace.getText());
582 }
583 }
584
585 return namespaces;
586 }
587
588 protected String[] readTagsEntries(
589 long userId, WikiNode node, String content)
590 throws PortalException, SystemException {
591
592 Matcher matcher = _categoriesPattern.matcher(content);
593
594 List<String> tagsEntries = new ArrayList<String>();
595
596 while (matcher.find()) {
597 String categoryName = matcher.group(1);
598
599 categoryName = normalize(categoryName, 75);
600
601 TagsEntry tagsEntry = null;
602
603 try {
604 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
605 node.getCompanyId(), categoryName);
606 }
607 catch (NoSuchEntryException nsee) {
608 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
609 userId, categoryName);
610 }
611
612 tagsEntries.add(tagsEntry.getName());
613 }
614
615 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
616 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
617 }
618
619 return tagsEntries.toArray(new String[tagsEntries.size()]);
620 }
621
622 protected Map<String, String> readUsersFile(File usersFile)
623 throws IOException {
624
625 if ((usersFile == null) || (!usersFile.exists())) {
626 return Collections.EMPTY_MAP;
627 }
628
629 Map<String, String> usersMap = new HashMap<String, String>();
630
631 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
632
633 String line = reader.readLine();
634
635 while (line != null) {
636 String[] array = StringUtil.split(line);
637
638 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
639 (Validator.isNotNull(array[1]))) {
640
641 usersMap.put(array[0], array[1]);
642 }
643 else {
644 if (_log.isInfoEnabled()) {
645 _log.info(
646 "Ignoring line " + line +
647 " because it does not contain exactly 2 columns");
648 }
649 }
650
651 line = reader.readLine();
652 }
653
654 return usersMap;
655 }
656
657 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
658 "thumb", "temp", "archive"};
659
660 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
661
662 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
663
664 private static Log _log = LogFactory.getLog(MediaWikiImporter.class);
665
666 private static Pattern _categoriesPattern = Pattern.compile(
667 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
668
669 private static Pattern _parentPattern = Pattern.compile(
670 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
671
672 private static Pattern _redirectPattern = Pattern.compile(
673 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
674
675 private MediaWikiToCreoleTranslator _translator =
676 new MediaWikiToCreoleTranslator();
677
678 }