001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portlet.wiki.importers.mediawiki;
016    
017    import com.liferay.portal.NoSuchUserException;
018    import com.liferay.portal.kernel.exception.PortalException;
019    import com.liferay.portal.kernel.exception.SystemException;
020    import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
021    import com.liferay.portal.kernel.log.Log;
022    import com.liferay.portal.kernel.log.LogFactoryUtil;
023    import com.liferay.portal.kernel.util.ArrayUtil;
024    import com.liferay.portal.kernel.util.CharPool;
025    import com.liferay.portal.kernel.util.MapUtil;
026    import com.liferay.portal.kernel.util.ObjectValuePair;
027    import com.liferay.portal.kernel.util.ProgressTracker;
028    import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
029    import com.liferay.portal.kernel.util.StreamUtil;
030    import com.liferay.portal.kernel.util.StringBundler;
031    import com.liferay.portal.kernel.util.StringPool;
032    import com.liferay.portal.kernel.util.StringUtil;
033    import com.liferay.portal.kernel.util.Validator;
034    import com.liferay.portal.kernel.xml.Document;
035    import com.liferay.portal.kernel.xml.DocumentException;
036    import com.liferay.portal.kernel.xml.Element;
037    import com.liferay.portal.kernel.xml.SAXReaderUtil;
038    import com.liferay.portal.kernel.zip.ZipReader;
039    import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040    import com.liferay.portal.model.User;
041    import com.liferay.portal.service.ServiceContext;
042    import com.liferay.portal.service.UserLocalServiceUtil;
043    import com.liferay.portal.util.PropsValues;
044    import com.liferay.portlet.asset.NoSuchTagException;
045    import com.liferay.portlet.asset.model.AssetTag;
046    import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047    import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048    import com.liferay.portlet.asset.util.AssetUtil;
049    import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050    import com.liferay.portlet.wiki.ImportFilesException;
051    import com.liferay.portlet.wiki.NoSuchPageException;
052    import com.liferay.portlet.wiki.importers.WikiImporter;
053    import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054    import com.liferay.portlet.wiki.model.WikiNode;
055    import com.liferay.portlet.wiki.model.WikiPage;
056    import com.liferay.portlet.wiki.model.WikiPageConstants;
057    import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058    import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059    
060    import java.io.IOException;
061    import java.io.InputStream;
062    import java.io.InputStreamReader;
063    
064    import java.util.ArrayList;
065    import java.util.Collections;
066    import java.util.HashMap;
067    import java.util.Iterator;
068    import java.util.List;
069    import java.util.Map;
070    import java.util.regex.Matcher;
071    import java.util.regex.Pattern;
072    
073    /**
074     * @author Alvaro del Castillo
075     * @author Jorge Ferrer
076     */
077    public class MediaWikiImporter implements WikiImporter {
078    
079            public static final String SHARED_IMAGES_CONTENT = "See attachments";
080    
081            public static final String SHARED_IMAGES_TITLE = "SharedImages";
082    
083            public void importPages(
084                            long userId, WikiNode node, InputStream[] inputStreams,
085                            Map<String, String[]> options)
086                    throws PortalException {
087    
088                    if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
089                            throw new PortalException("The pages file is mandatory");
090                    }
091    
092                    InputStream pagesInputStream = inputStreams[0];
093                    InputStream usersInputStream = inputStreams[1];
094                    InputStream imagesInputStream = inputStreams[2];
095    
096                    try {
097                            Document document = SAXReaderUtil.read(pagesInputStream);
098    
099                            Map<String, String> usersMap = readUsersFile(usersInputStream);
100    
101                            Element rootElement = document.getRootElement();
102    
103                            List<String> specialNamespaces = readSpecialNamespaces(rootElement);
104    
105                            processSpecialPages(userId, node, rootElement, specialNamespaces);
106                            processRegularPages(
107                                    userId, node, rootElement, specialNamespaces, usersMap,
108                                    imagesInputStream, options);
109                            processImages(userId, node, imagesInputStream);
110    
111                            moveFrontPage(userId, node, options);
112                    }
113                    catch (DocumentException de) {
114                            throw new ImportFilesException("Invalid XML file provided");
115                    }
116                    catch (IOException de) {
117                            throw new ImportFilesException("Error reading the files provided");
118                    }
119                    catch (PortalException e) {
120                            throw e;
121                    }
122                    catch (Exception e) {
123                            throw new PortalException(e);
124                    }
125            }
126    
127            protected long getUserId(
128                            long userId, WikiNode node, String author,
129                            Map<String, String> usersMap)
130                    throws PortalException, SystemException {
131    
132                    User user = null;
133    
134                    String emailAddress = usersMap.get(author);
135    
136                    try {
137                            if (Validator.isNull(emailAddress)) {
138                                    user = UserLocalServiceUtil.getUserByScreenName(
139                                            node.getCompanyId(), author.toLowerCase());
140                            }
141                            else {
142                                    user = UserLocalServiceUtil.getUserByEmailAddress(
143                                            node.getCompanyId(), emailAddress);
144                            }
145                    }
146                    catch (NoSuchUserException nsue) {
147                            user = UserLocalServiceUtil.getUserById(userId);
148                    }
149    
150                    return user.getUserId();
151            }
152    
153            protected void importPage(
154                            long userId, String author, WikiNode node, String title,
155                            String content, String summary, Map<String, String> usersMap,
156                            boolean strictImportMode)
157                    throws PortalException {
158    
159                    try {
160                            long authorUserId = getUserId(userId, node, author, usersMap);
161                            String parentTitle = readParentTitle(content);
162                            String redirectTitle = readRedirectTitle(content);
163    
164                            ServiceContext serviceContext = new ServiceContext();
165    
166                            serviceContext.setAddGroupPermissions(true);
167                            serviceContext.setAddGuestPermissions(true);
168                            serviceContext.setAssetTagNames(
169                                    readAssetTagNames(userId, node, content));
170    
171                            if (Validator.isNull(redirectTitle)) {
172                                    _translator.setStrictImportMode(strictImportMode);
173    
174                                    content = _translator.translate(content);
175                            }
176                            else {
177                                    content =
178                                            StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
179                                                    StringPool.DOUBLE_CLOSE_BRACKET;
180                            }
181    
182                            WikiPage page = null;
183    
184                            try {
185                                    page = WikiPageLocalServiceUtil.getPage(
186                                            node.getNodeId(), title);
187                            }
188                            catch (NoSuchPageException nspe) {
189                                    page = WikiPageLocalServiceUtil.addPage(
190                                            authorUserId, node.getNodeId(), title,
191                                            WikiPageConstants.NEW, null, true, serviceContext);
192                            }
193    
194                            WikiPageLocalServiceUtil.updatePage(
195                                    authorUserId, node.getNodeId(), title, page.getVersion(),
196                                    content, summary, true, "creole", parentTitle, redirectTitle,
197                                    serviceContext);
198                    }
199                    catch (Exception e) {
200                            throw new PortalException("Error importing page " + title, e);
201                    }
202            }
203    
204            protected boolean isSpecialMediaWikiPage(
205                    String title, List<String> specialNamespaces) {
206    
207                    for (String namespace: specialNamespaces) {
208                            if (title.startsWith(namespace + StringPool.COLON)) {
209                                    return true;
210                            }
211                    }
212    
213                    return false;
214            }
215    
216            protected boolean isValidImage(String[] paths, InputStream inputStream) {
217                    if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
218                            return false;
219                    }
220    
221                    if ((paths.length > 1) &&
222                            (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
223    
224                            return false;
225                    }
226    
227                    String fileName = paths[paths.length - 1];
228    
229                    try {
230                            DLStoreUtil.validate(fileName, true, inputStream);
231                    }
232                    catch (PortalException pe) {
233                            return false;
234                    }
235                    catch (SystemException se) {
236                            return false;
237                    }
238    
239                    return true;
240            }
241    
242            protected void moveFrontPage(
243                    long userId, WikiNode node, Map<String, String[]> options) {
244    
245                    String frontPageTitle = MapUtil.getString(
246                            options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
247    
248                    if (Validator.isNotNull(frontPageTitle)) {
249                            frontPageTitle = normalizeTitle(frontPageTitle);
250    
251                            try {
252                                    if (WikiPageLocalServiceUtil.getPagesCount(
253                                                    node.getNodeId(), frontPageTitle, true) > 0) {
254    
255                                            ServiceContext serviceContext = new ServiceContext();
256    
257                                            serviceContext.setAddGroupPermissions(true);
258                                            serviceContext.setAddGuestPermissions(true);
259    
260                                            WikiPageLocalServiceUtil.movePage(
261                                                    userId, node.getNodeId(), frontPageTitle,
262                                                    WikiPageConstants.FRONT_PAGE, false, serviceContext);
263    
264                                    }
265                            }
266                            catch (Exception e) {
267                                    if (_log.isWarnEnabled()) {
268                                            StringBundler sb = new StringBundler(4);
269    
270                                            sb.append("Could not move ");
271                                            sb.append(WikiPageConstants.FRONT_PAGE);
272                                            sb.append(" to the title provided: ");
273                                            sb.append(frontPageTitle);
274    
275                                            _log.warn(sb.toString(), e);
276                                    }
277                            }
278    
279                    }
280    
281            }
282    
283            protected String normalize(String categoryName, int length) {
284                    categoryName = AssetUtil.toWord(categoryName.trim());
285    
286                    return StringUtil.shorten(categoryName, length);
287            }
288    
289            protected String normalizeDescription(String description) {
290                    description = description.replaceAll(
291                            _categoriesPattern.pattern(), StringPool.BLANK);
292    
293                    return normalize(description, 300);
294            }
295    
296            protected String normalizeTitle(String title) {
297                    title = title.replaceAll(
298                            PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
299    
300                    return StringUtil.shorten(title, 75);
301            }
302    
303            protected void processImages(
304                            long userId, WikiNode node, InputStream imagesInputStream)
305                    throws Exception {
306    
307                    if (imagesInputStream == null) {
308                            return;
309                    }
310    
311                    ProgressTracker progressTracker =
312                            ProgressTrackerThreadLocal.getProgressTracker();
313    
314                    int count = 0;
315    
316                    ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
317                            imagesInputStream);
318    
319                    List<String> entries = zipReader.getEntries();
320    
321                    int total = entries.size();
322    
323                    if (total > 0) {
324                            try {
325                                    WikiPageLocalServiceUtil.getPage(
326                                            node.getNodeId(), SHARED_IMAGES_TITLE);
327                            }
328                            catch (NoSuchPageException nspe) {
329                                    ServiceContext serviceContext = new ServiceContext();
330    
331                                    serviceContext.setAddGroupPermissions(true);
332                                    serviceContext.setAddGuestPermissions(true);
333    
334                                    WikiPageLocalServiceUtil.addPage(
335                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
336                                            SHARED_IMAGES_CONTENT, null, true, serviceContext);
337                            }
338                    }
339    
340                    List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
341                            new ArrayList<ObjectValuePair<String, InputStream>>();
342    
343                    try {
344                            int percentage = 50;
345    
346                            for (int i = 0; i < entries.size(); i++) {
347                                    String entry = entries.get(i);
348    
349                                    String key = entry;
350    
351                                    InputStream inputStream = zipReader.getEntryAsInputStream(
352                                            entry);
353    
354                                    String[] paths = StringUtil.split(key, CharPool.SLASH);
355    
356                                    if (!isValidImage(paths, inputStream)) {
357                                            if (_log.isInfoEnabled()) {
358                                                    _log.info("Ignoring " + key);
359                                            }
360    
361                                            continue;
362                                    }
363    
364                                    String fileName = paths[paths.length - 1].toLowerCase();
365    
366                                    ObjectValuePair<String, InputStream> inputStreamOVP =
367                                            new ObjectValuePair<String, InputStream>(
368                                                    fileName, inputStream);
369    
370                                    inputStreamOVPs.add(inputStreamOVP);
371    
372                                    count++;
373    
374                                    if ((i % 5) == 0) {
375                                            WikiPageLocalServiceUtil.addPageAttachments(
376                                                    userId, node.getNodeId(), SHARED_IMAGES_TITLE,
377                                                    inputStreamOVPs);
378    
379                                            inputStreamOVPs.clear();
380    
381                                            percentage = Math.min(50 + (i * 50) / total, 99);
382    
383                                            progressTracker.updateProgress(percentage);
384                                    }
385                            }
386    
387                            if (!inputStreamOVPs.isEmpty()) {
388                                    WikiPageLocalServiceUtil.addPageAttachments(
389                                            userId, node.getNodeId(), SHARED_IMAGES_TITLE,
390                                            inputStreamOVPs);
391                            }
392                    }
393                    finally {
394                            for (ObjectValuePair<String, InputStream> inputStreamOVP :
395                                            inputStreamOVPs) {
396    
397                                    InputStream inputStream = inputStreamOVP.getValue();
398    
399                                    StreamUtil.cleanUp(inputStream);
400                            }
401                    }
402    
403                    zipReader.close();
404    
405                    if (_log.isInfoEnabled()) {
406                            _log.info("Imported " + count + " images into " + node.getName());
407                    }
408            }
409    
410            protected void processRegularPages(
411                    long userId, WikiNode node, Element rootElement,
412                    List<String> specialNamespaces, Map<String, String> usersMap,
413                    InputStream imagesInputStream, Map<String, String[]> options) {
414    
415                    boolean importLatestVersion = MapUtil.getBoolean(
416                            options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
417                    boolean strictImportMode = MapUtil.getBoolean(
418                            options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
419    
420                    ProgressTracker progressTracker =
421                            ProgressTrackerThreadLocal.getProgressTracker();
422    
423                    int count = 0;
424    
425                    int percentage = 10;
426    
427                    int maxPercentage = 50;
428    
429                    if (imagesInputStream == null) {
430                            maxPercentage = 99;
431                    }
432    
433                    List<Element> pageElements = rootElement.elements("page");
434    
435                    for (int i = 0; i < pageElements.size(); i++) {
436                            Element pageElement = pageElements.get(i);
437    
438                            String title = pageElement.elementText("title");
439    
440                            title = normalizeTitle(title);
441    
442                            percentage = Math.min(
443                                    10 + (i * (maxPercentage - percentage)) / pageElements.size(),
444                                    maxPercentage);
445    
446                            progressTracker.updateProgress(percentage);
447    
448                            if (isSpecialMediaWikiPage(title, specialNamespaces)) {
449                                    continue;
450                            }
451    
452                            List<Element> revisionElements = pageElement.elements("revision");
453    
454                            if (importLatestVersion) {
455                                    Element lastRevisionElement = revisionElements.get(
456                                            revisionElements.size() - 1);
457    
458                                    revisionElements = new ArrayList<Element>();
459    
460                                    revisionElements.add(lastRevisionElement);
461                            }
462    
463                            for (Element revisionElement : revisionElements) {
464                                    Element contributorElement = revisionElement.element(
465                                            "contributor");
466    
467                                    String author = contributorElement.elementText("username");
468    
469                                    String content = revisionElement.elementText("text");
470                                    String summary = revisionElement.elementText("comment");
471    
472                                    try {
473                                            importPage(
474                                                    userId, author, node, title, content, summary, usersMap,
475                                                    strictImportMode);
476                                    }
477                                    catch (Exception e) {
478                                            if (_log.isWarnEnabled()) {
479                                                    _log.warn(
480                                                            "Page with title " + title +
481                                                                    " could not be imported",
482                                                            e);
483                                            }
484                                    }
485                            }
486    
487                            count++;
488                    }
489    
490                    if (_log.isInfoEnabled()) {
491                            _log.info("Imported " + count + " pages into " + node.getName());
492                    }
493            }
494    
495            protected void processSpecialPages(
496                            long userId, WikiNode node, Element rootElement,
497                            List<String> specialNamespaces)
498                    throws PortalException {
499    
500                    ProgressTracker progressTracker =
501                            ProgressTrackerThreadLocal.getProgressTracker();
502    
503                    List<Element> pageElements = rootElement.elements("page");
504    
505                    for (int i = 0; i < pageElements.size(); i++) {
506                            Element pageElement = pageElements.get(i);
507    
508                            String title = pageElement.elementText("title");
509    
510                            if (!title.startsWith("Category:")) {
511                                    if (isSpecialMediaWikiPage(title, specialNamespaces)) {
512                                            rootElement.remove(pageElement);
513                                    }
514    
515                                    continue;
516                            }
517    
518                            String categoryName = title.substring("Category:".length());
519    
520                            categoryName = normalize(categoryName, 75);
521    
522                            Element revisionElement = pageElement.element("revision");
523    
524                            String description = revisionElement.elementText("text");
525    
526                            description = normalizeDescription(description);
527    
528                            try {
529                                    AssetTag assetTag = null;
530    
531                                    try {
532                                            assetTag = AssetTagLocalServiceUtil.getTag(
533                                                    node.getCompanyId(), categoryName);
534                                    }
535                                    catch (NoSuchTagException nste) {
536                                            ServiceContext serviceContext = new ServiceContext();
537    
538                                            serviceContext.setAddGroupPermissions(true);
539                                            serviceContext.setAddGuestPermissions(true);
540                                            serviceContext.setScopeGroupId(node.getGroupId());
541    
542                                            assetTag = AssetTagLocalServiceUtil.addTag(
543                                                    userId, categoryName, null, serviceContext);
544                                    }
545    
546                                    if (Validator.isNotNull(description)) {
547                                            AssetTagPropertyLocalServiceUtil.addTagProperty(
548                                                    userId, assetTag.getTagId(), "description",
549                                                    description);
550                                    }
551                            }
552                            catch (SystemException se) {
553                                     _log.error(se, se);
554                            }
555    
556                            if ((i % 5) == 0) {
557                                    progressTracker.updateProgress((i * 10) / pageElements.size());
558                            }
559                    }
560            }
561    
562            protected String[] readAssetTagNames(
563                            long userId, WikiNode node, String content)
564                    throws PortalException, SystemException {
565    
566                    Matcher matcher = _categoriesPattern.matcher(content);
567    
568                    List<String> assetTagNames = new ArrayList<String>();
569    
570                    while (matcher.find()) {
571                            String categoryName = matcher.group(1);
572    
573                            categoryName = normalize(categoryName, 75);
574    
575                            AssetTag assetTag = null;
576    
577                            try {
578                                    assetTag = AssetTagLocalServiceUtil.getTag(
579                                            node.getGroupId(), categoryName);
580                            }
581                            catch (NoSuchTagException nste) {
582                                    ServiceContext serviceContext = new ServiceContext();
583    
584                                    serviceContext.setAddGroupPermissions(true);
585                                    serviceContext.setAddGuestPermissions(true);
586                                    serviceContext.setScopeGroupId(node.getGroupId());
587    
588                                    assetTag = AssetTagLocalServiceUtil.addTag(
589                                            userId, categoryName, null, serviceContext);
590                            }
591    
592                            assetTagNames.add(assetTag.getName());
593                    }
594    
595                    if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
596                            assetTagNames.add(_WORK_IN_PROGRESS_TAG);
597                    }
598    
599                    return assetTagNames.toArray(new String[assetTagNames.size()]);
600            }
601    
602            protected String readParentTitle(String content) {
603                    Matcher matcher = _parentPattern.matcher(content);
604    
605                    String redirectTitle = StringPool.BLANK;
606    
607                    if (matcher.find()) {
608                            redirectTitle = matcher.group(1);
609    
610                            redirectTitle = normalizeTitle(redirectTitle);
611    
612                            redirectTitle += " (disambiguation)";
613                    }
614    
615                    return redirectTitle;
616            }
617            protected String readRedirectTitle(String content) {
618                    Matcher matcher = _redirectPattern.matcher(content);
619    
620                    String redirectTitle = StringPool.BLANK;
621    
622                    if (matcher.find()) {
623                            redirectTitle = matcher.group(1);
624    
625                            redirectTitle = normalizeTitle(redirectTitle);
626                    }
627    
628                    return redirectTitle;
629            }
630            protected List<String> readSpecialNamespaces(Element root)
631                    throws ImportFilesException {
632    
633                    List<String> namespaces = new ArrayList<String>();
634    
635                    Element siteinfoEl = root.element("siteinfo");
636    
637                    if (siteinfoEl == null) {
638                            throw new ImportFilesException("Invalid pages XML file");
639                    }
640    
641                    Iterator<Element> itr = siteinfoEl.element(
642                            "namespaces").elements("namespace").iterator();
643    
644                    while (itr.hasNext()) {
645                            Element namespace = itr.next();
646    
647                            if (!namespace.attribute("key").getData().equals("0")) {
648                                    namespaces.add(namespace.getText());
649                            }
650                    }
651    
652                    return namespaces;
653            }
654    
655            protected Map<String, String> readUsersFile(InputStream usersInputStream)
656                    throws IOException {
657    
658                    if (usersInputStream == null) {
659                            return Collections.emptyMap();
660                    }
661    
662                    Map<String, String> usersMap = new HashMap<String, String>();
663    
664                    UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
665                            new InputStreamReader(usersInputStream));
666    
667                    String line = unsyncBufferedReader.readLine();
668    
669                    while (line != null) {
670                            String[] array = StringUtil.split(line);
671    
672                            if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
673                                    (Validator.isNotNull(array[1]))) {
674    
675                                    usersMap.put(array[0], array[1]);
676                            }
677                            else {
678                                    if (_log.isInfoEnabled()) {
679                                            _log.info(
680                                                    "Ignoring line " + line +
681                                                            " because it does not contain exactly 2 columns");
682                                    }
683                            }
684    
685                            line = unsyncBufferedReader.readLine();
686                    }
687    
688                    return usersMap;
689            }
690    
691            private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = {
692                    "thumb", "temp", "archive"
693            };
694    
695            private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
696    
697            private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
698    
699            private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
700    
701            private static Pattern _categoriesPattern = Pattern.compile(
702                    "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
703            private static Pattern _parentPattern = Pattern.compile(
704                    "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
705            private static Pattern _redirectPattern = Pattern.compile(
706                    "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
707    
708            private MediaWikiToCreoleTranslator _translator =
709                    new MediaWikiToCreoleTranslator();
710    
711    }