001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.util.DiffResult;
018    import com.liferay.portal.kernel.util.FileUtil;
019    import com.liferay.portal.kernel.util.StringBundler;
020    import com.liferay.portal.kernel.util.StringPool;
021    
022    import java.io.Reader;
023    
024    import java.util.ArrayList;
025    import java.util.Iterator;
026    import java.util.List;
027    
028    import org.incava.util.diff.Diff;
029    import org.incava.util.diff.Difference;
030    
031    /**
032     * <p>
033     * This class can compare two different versions of a text. Source refers to the
034     * earliest version of the text and target refers to a modified version of
035     * source. Changes are considered either as a removal from the source or as an
036     * addition to the target. This class detects changes to an entire line and also
037     * detects changes within lines, such as, removal or addition of characters.
038     * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
039     * </p>
040     *
041     * @author Bruno Farache
042     */
043    public class DiffImpl implements com.liferay.portal.kernel.util.Diff {
044    
045            /**
046             * This is a diff method with default values.
047             *
048             * @return an array containing two lists of <code>DiffResults</code>, the
049             *         first element contains DiffResults related to changes in source
050             *         and the second element to changes in target
051             */
052            public List<DiffResult>[] diff(Reader source, Reader target) {
053                    int margin = 2;
054    
055                    return diff(
056                            source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
057            }
058    
059            /**
060             * The main entrance of this class. This method will compare the two texts,
061             * highlight the changes by enclosing them with markers and return a list of
062             * <code>DiffResults</code>.
063             *
064             * @return an array containing two lists of <code>DiffResults</code>, the
065             *         first element contains DiffResults related to changes in source
066             *         and the second element to changes in target
067             */
068            public List<DiffResult>[] diff(
069                    Reader source, Reader target, String addedMarkerStart,
070                    String addedMarkerEnd, String deletedMarkerStart,
071                    String deletedMarkerEnd, int margin) {
072    
073                    List<DiffResult> sourceResults = new ArrayList<DiffResult>();
074                    List<DiffResult> targetResults = new ArrayList<DiffResult>();
075    
076                    List<DiffResult>[] results = new List[] {sourceResults, targetResults};
077    
078                    // Convert the texts to Lists where each element are lines of the texts.
079    
080                    List<String> sourceStringList = FileUtil.toList(source);
081                    List<String> targetStringList = FileUtil.toList(target);
082    
083                    // Make a a Diff of these lines and iterate over their Differences.
084    
085                    Diff diff = new Diff(sourceStringList, targetStringList);
086    
087                    List<Difference> differences = diff.diff();
088    
089                    Iterator<Difference> itr = differences.iterator();
090    
091                    while (itr.hasNext()) {
092                            Difference difference = itr.next();
093    
094                            if (difference.getAddedEnd() == Difference.NONE) {
095    
096                                    // Lines were deleted from source only.
097    
098                                    _highlightLines(
099                                            sourceStringList, deletedMarkerStart, deletedMarkerEnd,
100                                            difference.getDeletedStart(), difference.getDeletedEnd());
101    
102                                    margin = _calculateMargin(
103                                            sourceResults, targetResults, difference.getDeletedStart(),
104                                            difference.getAddedStart(), margin);
105    
106                                    List<String> changedLines = _addMargins(
107                                            sourceResults, sourceStringList,
108                                            difference.getDeletedStart(), margin);
109    
110                                    _addResults(
111                                            sourceResults, sourceStringList, changedLines,
112                                            difference.getDeletedStart(), difference.getDeletedEnd());
113    
114                                    changedLines = _addMargins(
115                                            targetResults, targetStringList, difference.getAddedStart(),
116                                            margin);
117    
118                                    int deletedLines =
119                                            difference.getDeletedEnd() + 1 -
120                                                    difference.getDeletedStart();
121    
122                                    for (int i = 0; i < deletedLines; i++) {
123                                            changedLines.add(CONTEXT_LINE);
124                                    }
125    
126                                    DiffResult diffResult = new DiffResult(
127                                            difference.getDeletedStart(), changedLines);
128    
129                                    targetResults.add(diffResult);
130                            }
131                            else if (difference.getDeletedEnd() == Difference.NONE) {
132    
133                                    // Lines were added to target only.
134    
135                                    _highlightLines(
136                                            targetStringList, addedMarkerStart, addedMarkerEnd,
137                                            difference.getAddedStart(), difference.getAddedEnd());
138    
139                                    margin = _calculateMargin(
140                                            sourceResults, targetResults, difference.getDeletedStart(),
141                                            difference.getAddedStart(), margin);
142    
143                                    List<String> changedLines = _addMargins(
144                                            sourceResults, sourceStringList,
145                                            difference.getDeletedStart(), margin);
146    
147                                    int addedLines =
148                                            difference.getAddedEnd() + 1 - difference.getAddedStart();
149    
150                                    for (int i = 0; i < addedLines; i++) {
151                                            changedLines.add(CONTEXT_LINE);
152                                    }
153    
154                                    DiffResult diffResult = new DiffResult(
155                                            difference.getAddedStart(), changedLines);
156    
157                                    sourceResults.add(diffResult);
158    
159                                    changedLines = _addMargins(
160                                            targetResults, targetStringList, difference.getAddedStart(),
161                                            margin);
162    
163                                    _addResults(
164                                            targetResults, targetStringList, changedLines,
165                                            difference.getAddedStart(), difference.getAddedEnd());
166                            }
167                            else {
168    
169                                    // Lines were deleted from source and added to target at the
170                                    // same position. It needs to check for characters differences.
171    
172                                    _checkCharDiffs(
173                                            sourceResults, targetResults, sourceStringList,
174                                            targetStringList, addedMarkerStart, addedMarkerEnd,
175                                            deletedMarkerStart, deletedMarkerEnd, difference, margin);
176                            }
177                    }
178    
179                    return results;
180            }
181    
182            private static List<String> _addMargins(
183                    List<DiffResult> results, List<String> stringList, int startPos,
184                    int margin) {
185    
186                    List<String> changedLines = new ArrayList<String>();
187    
188                    if (margin == 0 || startPos == 0) {
189                            return changedLines;
190                    }
191    
192                    int i = startPos - margin;
193    
194                    for (; i < 0; i++) {
195                            changedLines.add(CONTEXT_LINE);
196                    }
197    
198                    for (; i < startPos; i++) {
199                            if (i < stringList.size()) {
200                                    changedLines.add(stringList.get(i));
201                            }
202                    }
203    
204                    return changedLines;
205            }
206    
207            private static void _addResults(
208                    List<DiffResult> results, List<String> stringList,
209                    List<String> changedLines, int start, int end) {
210    
211                    changedLines.addAll(stringList.subList(start, end + 1));
212    
213                    DiffResult diffResult = new DiffResult(start, changedLines);
214    
215                    results.add(diffResult);
216            }
217    
218            private static int _calculateMargin(
219                    List<DiffResult> sourceResults, List<DiffResult> targetResults,
220                    int sourceBeginPos, int targetBeginPos, int margin) {
221    
222                    int sourceMargin = _checkOverlapping(
223                            sourceResults, sourceBeginPos, margin);
224                    int targetMargin = _checkOverlapping(
225                            targetResults, targetBeginPos, margin);
226    
227                    if (sourceMargin < targetMargin) {
228                            return sourceMargin;
229                    }
230    
231                    return targetMargin;
232            }
233    
234            private static void _checkCharDiffs(
235                    List<DiffResult> sourceResults, List<DiffResult> targetResults,
236                    List<String> sourceStringList, List<String> targetStringList,
237                    String addedMarkerStart, String addedMarkerEnd,
238                    String deletedMarkerStart, String deletedMarkerEnd,
239                    Difference difference, int margin) {
240    
241                    boolean aligned = false;
242    
243                    int i = difference.getDeletedStart();
244                    int j = difference.getAddedStart();
245    
246                    // A line with changed characters may have its position shifted some
247                    // lines above or below. These for loops will try to align these lines.
248                    // While these lines are not aligned, highlight them as either additions
249                    // or deletions.
250    
251                    for (; i <= difference.getDeletedEnd(); i++) {
252                            for (; j <= difference.getAddedEnd(); j++) {
253                                    if (!_isMaxLineLengthExceeded(
254                                                    sourceStringList.get(i), targetStringList.get(j)) &&
255                                            _lineDiff(
256                                                    sourceResults, targetResults, sourceStringList,
257                                                    targetStringList, addedMarkerStart, addedMarkerEnd,
258                                                    deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
259    
260                                            aligned = true;
261    
262                                            break;
263                                    }
264    
265                                    _highlightLines(
266                                            targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
267    
268                                    DiffResult targetResult = new DiffResult(
269                                            j, targetStringList.subList(j, j + 1));
270    
271                                    targetResults.add(targetResult);
272    
273                                    sourceResults.add(new DiffResult(j, CONTEXT_LINE));
274                            }
275    
276                            if (aligned) {
277                                     break;
278                            }
279    
280                            _highlightLines(
281                                    sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
282    
283                            DiffResult sourceResult = new DiffResult(
284                                    i, sourceStringList.subList(i, i + 1));
285    
286                            sourceResults.add(sourceResult);
287    
288                            targetResults.add(new DiffResult(i, CONTEXT_LINE));
289                    }
290    
291                    i = i + 1;
292                    j = j + 1;
293    
294                    // Lines are aligned, check for differences of the following lines.
295    
296                    for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
297                                    i++, j++) {
298    
299                            if (!_isMaxLineLengthExceeded(
300                                            sourceStringList.get(i), targetStringList.get(j))) {
301    
302                                    _lineDiff(
303                                            sourceResults, targetResults, sourceStringList,
304                                            targetStringList, addedMarkerStart, addedMarkerEnd,
305                                            deletedMarkerStart, deletedMarkerEnd, i, j, true);
306                            }
307                            else {
308                                    _highlightLines(
309                                            sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
310                                            i);
311    
312                                    DiffResult sourceResult = new DiffResult(
313                                            i, sourceStringList.subList(i, i + 1));
314    
315                                    sourceResults.add(sourceResult);
316    
317                                    targetResults.add(new DiffResult(i, CONTEXT_LINE));
318    
319                                    _highlightLines(
320                                            targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
321    
322                                    DiffResult targetResult = new DiffResult(
323                                            j, targetStringList.subList(j, j + 1));
324    
325                                    targetResults.add(targetResult);
326    
327                                    sourceResults.add(new DiffResult(j, CONTEXT_LINE));
328                            }
329                    }
330    
331                    // After the for loop above, some lines might remained unchecked.
332                    // They are considered as deletions or additions.
333    
334                    for (; i <= difference.getDeletedEnd();i++) {
335                            _highlightLines(
336                                    sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
337    
338                            DiffResult sourceResult = new DiffResult(
339                                    i, sourceStringList.subList(i, i + 1));
340    
341                            sourceResults.add(sourceResult);
342    
343                            targetResults.add(new DiffResult(i, CONTEXT_LINE));
344                    }
345    
346                    for (; j <= difference.getAddedEnd(); j++) {
347                            _highlightLines(
348                                    targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
349    
350                            DiffResult targetResult = new DiffResult(
351                                    j, targetStringList.subList(j, j + 1));
352    
353                            targetResults.add(targetResult);
354    
355                            sourceResults.add(new DiffResult(j, CONTEXT_LINE));
356                    }
357            }
358    
359            private static int _checkOverlapping(
360                    List<DiffResult> results, int startPos, int margin) {
361    
362                    if (results.size() == 0 || (startPos - margin) < 0) {
363                            return margin;
364                    }
365    
366                    DiffResult lastDiff = results.get(results.size() - 1);
367    
368                    if (lastDiff.getChangedLines().size() == 0) {
369                            return margin;
370                    }
371    
372                    int lastChangedLine = (lastDiff.getLineNumber() - 1) +
373                            lastDiff.getChangedLines().size();
374    
375                    int currentChangedLine = startPos - margin;
376    
377                    if ((lastDiff.getChangedLines().size() == 1) &&
378                            (lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE))) {
379    
380                            currentChangedLine = currentChangedLine + 1;
381                    }
382    
383                    if (currentChangedLine < lastChangedLine) {
384                            return margin + currentChangedLine - lastChangedLine;
385                    }
386    
387                    return margin;
388            }
389    
390            private static void _highlightChars(
391                    List<String> stringList, String markerStart, String markerEnd,
392                    int startPos, int endPos) {
393    
394                    String start = markerStart + stringList.get(startPos);
395    
396                    stringList.set(startPos, start);
397    
398                    String end = stringList.get(endPos) + markerEnd;
399    
400                    stringList.set(endPos, end);
401            }
402    
403            private static void _highlightLines(
404                    List<String> stringList, String markerStart, String markerEnd,
405                    int startPos, int endPos) {
406    
407                    for (int i = startPos; i <= endPos; i++) {
408                            stringList.set(i, markerStart + stringList.get(i) + markerEnd);
409                    }
410            }
411    
412            private static boolean _isMaxLineLengthExceeded(
413                    String sourceString, String targetString) {
414    
415                    if ((sourceString.length() > _DIFF_MAX_LINE_LENGTH) ||
416                            (targetString.length() > _DIFF_MAX_LINE_LENGTH)) {
417    
418                            return true;
419                    }
420    
421                    return false;
422            }
423    
424            private static boolean _lineDiff(
425                    List<DiffResult> sourceResults, List<DiffResult> targetResults,
426                    List<String> sourceStringList, List<String> targetStringList,
427                    String addedMarkerStart, String addedMarkerEnd,
428                    String deletedMarkerStart, String deletedMarkerEnd,
429                    int sourceChangedLine, int targetChangedLine, boolean aligned) {
430    
431                    String source = sourceStringList.get(sourceChangedLine);
432                    String target = targetStringList.get(targetChangedLine);
433    
434                    // Convert the lines to lists where each element are chars of the lines.
435    
436                    List<String> sourceList = _toList(source);
437                    List<String> targetList = _toList(target);
438    
439                    Diff diff = new Diff(sourceList, targetList);
440    
441                    List<Difference> differences = diff.diff();
442    
443                    Iterator<Difference> itr = differences.iterator();
444    
445                    int deletedChars = 0;
446                    int addedChars = 0;
447    
448                    // The following while loop will calculate how many characters of
449                    // the source line need to be changed to be equals to the target line.
450    
451                    while (itr.hasNext() && !aligned) {
452                            Difference difference = itr.next();
453    
454                            if (difference.getDeletedEnd() != Difference.NONE) {
455                                    deletedChars =
456                                            deletedChars +
457                                            (difference.getDeletedEnd() -
458                                                    difference.getDeletedStart() + 1);
459                            }
460    
461                            if (difference.getAddedEnd() != Difference.NONE) {
462                                    addedChars =
463                                            addedChars +
464                                            (difference.getAddedEnd() - difference.getAddedStart() + 1);
465                            }
466                    }
467    
468                    // If a lot of changes were needed (more than half of the source line
469                    // length), consider this as not aligned yet.
470    
471                    if ((deletedChars > (sourceList.size() / 2)) ||
472                            (addedChars > sourceList.size() / 2)) {
473    
474                            return false;
475                    }
476    
477                    itr = differences.iterator();
478    
479                    boolean sourceChanged = false;
480                    boolean targetChanged = false;
481    
482                    // Iterate over Differences between chars of these lines.
483    
484                    while (itr.hasNext()) {
485                            Difference difference = itr.next();
486    
487                            if (difference.getAddedEnd() == Difference.NONE) {
488    
489                                    // Chars were deleted from source only.
490    
491                                    _highlightChars(
492                                            sourceList, deletedMarkerStart, deletedMarkerEnd,
493                                            difference.getDeletedStart(), difference.getDeletedEnd());
494    
495                                    sourceChanged = true;
496                            }
497                            else if (difference.getDeletedEnd() == Difference.NONE) {
498    
499                                    // Chars were added to target only.
500    
501                                    _highlightChars(
502                                            targetList, addedMarkerStart, addedMarkerEnd,
503                                            difference.getAddedStart(), difference.getAddedEnd());
504    
505                                    targetChanged = true;
506                            }
507                            else {
508    
509                                    // Chars were both deleted and added.
510    
511                                    _highlightChars(
512                                            sourceList, deletedMarkerStart, deletedMarkerEnd,
513                                            difference.getDeletedStart(), difference.getDeletedEnd());
514    
515                                    sourceChanged = true;
516    
517                                    _highlightChars(
518                                            targetList, addedMarkerStart, addedMarkerEnd,
519                                            difference.getAddedStart(), difference.getAddedEnd());
520    
521                                    targetChanged = true;
522                            }
523                    }
524    
525                    if (sourceChanged) {
526                            DiffResult sourceResult = new DiffResult(
527                                    sourceChangedLine, _toString(sourceList));
528    
529                            sourceResults.add(sourceResult);
530    
531                            if (!targetChanged) {
532                                    DiffResult targetResult = new DiffResult(
533                                            targetChangedLine, target);
534    
535                                    targetResults.add(targetResult);
536                            }
537                    }
538    
539                    if (targetChanged) {
540                            if (!sourceChanged) {
541                                    DiffResult sourceResult = new DiffResult(
542                                            sourceChangedLine, source);
543    
544                                    sourceResults.add(sourceResult);
545                            }
546    
547                            DiffResult targetResult = new DiffResult(
548                                    targetChangedLine, _toString(targetList));
549    
550                            targetResults.add(targetResult);
551                    }
552    
553                    return true;
554            }
555    
556            private static List<String> _toList(String line) {
557                    String[] stringArray = line.split(StringPool.BLANK);
558    
559                    List<String> result = new ArrayList<String>();
560    
561                    for (int i = 1; i < stringArray.length; i++) {
562                            result.add(stringArray[i]);
563                    }
564    
565                    return result;
566            }
567    
568            private static String _toString(List<String> line) {
569                    if (line.isEmpty()) {
570                            return StringPool.BLANK;
571                    }
572    
573                    StringBundler sb = new StringBundler(line.size());
574    
575                    Iterator<String> itr = line.iterator();
576    
577                    while (itr.hasNext()) {
578                            sb.append(itr.next());
579                    }
580    
581                    return sb.toString();
582            }
583    
584            private static final int _DIFF_MAX_LINE_LENGTH = 5000;
585    
586    }