1   /**
2    * Copyright (c) 2000-2008 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util.diff;
24  
25  import com.liferay.portal.kernel.util.StringMaker;
26  import com.liferay.portal.kernel.util.StringPool;
27  import com.liferay.util.FileUtil;
28  
29  import java.io.Reader;
30  
31  import java.util.ArrayList;
32  import java.util.Iterator;
33  import java.util.List;
34  
35  import org.incava.util.diff.Diff;
36  import org.incava.util.diff.Difference;
37  
38  /**
39   * <a href="DiffUtil.java.html"><b><i>View Source</i></b></a>
40   *
41   * <p>
42   * This class can compare two different versions of a text. Source refers to
43   * the earliest version of the text and target refers to a modified version of
44   * source. Changes are considered either as a removal from the source or as an
45   * addition to the target. This class detects changes to an entire line and also
46   * detects changes within lines, such as, removal or addition of characters.
47   * Take a look at <code>DiffTest</code> to see the expected inputs and outputs.
48   * </p>
49   *
50   * @author Bruno Farache
51   *
52   */
53  public class DiffUtil {
54  
55      public static final String OPEN_INS = "<ins>";
56  
57      public static final String CLOSE_INS = "</ins>";
58  
59      public static final String OPEN_DEL = "<del>";
60  
61      public static final String CLOSE_DEL = "</del>";
62  
63      public static final String CONTEXT_LINE = "#context#line#";
64  
65      /**
66       * This is a diff method with default values.
67       *
68       * @param       source the <code>Reader</code> of the source text
69       * @param       target the <code>Reader</code> of the target text
70       * @return      an array containing two lists of <code>DiffResults</code>,
71       *              the first element contains DiffResults related to changes
72       *              in source and the second element to changes in target
73       */
74      public static List[] diff(Reader source, Reader target) {
75          int margin = 2;
76  
77          return diff(
78              source, target, OPEN_INS, CLOSE_INS, OPEN_DEL, CLOSE_DEL, margin);
79      }
80  
81      /**
82       * The main entrance of this class. This method will compare the two texts,
83       * highlight the changes by enclosing them with markers and return a list
84       * of <code>DiffResults</code>.
85       *
86       * @param       source the <code>Reader</code> of the source text, this can
87       *              be for example, an instance of FileReader or StringReader
88       * @param       target the <code>Reader</code> of the target text
89       * @param       addedMarkerStart the initial marker for highlighting
90       *              additions
91       * @param       addedMarkerEnd the end marker for highlighting additions
92       * @param       deletedMarkerStart the initial marker for highlighting
93       *              removals
94       * @param       deletedMarkerEnd the end marker for highlighting removals
95       * @param       margin the number of lines that will be added before the
96       *              first changed line
97       * @return      an array containing two lists of <code>DiffResults</code>,
98       *              the first element contains DiffResults related to changes
99       *              in source and the second element to changes in target
100      */
101     public static List[] diff(
102         Reader source, Reader target, String addedMarkerStart,
103         String addedMarkerEnd, String deletedMarkerStart,
104         String deletedMarkerEnd, int margin) {
105 
106         List sourceResults = new ArrayList();
107         List targetResults = new ArrayList();
108 
109         List[] results = new List[] {sourceResults, targetResults};
110 
111         // Convert the texts to Lists where each element are lines of the texts.
112 
113         List sourceStringList = FileUtil.toList(source);
114         List targetStringList = FileUtil.toList(target);
115 
116         // Make a a Diff of these lines and iterate over their Differences.
117 
118         Diff diff = new Diff(sourceStringList, targetStringList);
119 
120         List differences = diff.diff();
121 
122         Iterator itr = differences.iterator();
123 
124         while (itr.hasNext()) {
125             Difference difference = (Difference)itr.next();
126 
127             if (difference.getAddedEnd() == Difference.NONE) {
128 
129                 // Lines were deleted from source only.
130 
131                 _highlightLines(
132                     sourceStringList, deletedMarkerStart,
133                     deletedMarkerEnd, difference.getDeletedStart(),
134                     difference.getDeletedEnd());
135 
136                 margin = _calculateMargin(
137                     sourceResults, targetResults, difference.getDeletedStart(),
138                     difference.getAddedStart(), margin);
139 
140                 List changedLines = _addMargins(
141                     sourceResults, sourceStringList,
142                     difference.getDeletedStart(), margin);
143 
144                 _addResults(
145                     sourceResults, sourceStringList, changedLines,
146                     difference.getDeletedStart(), difference.getDeletedEnd());
147 
148                 changedLines = _addMargins(
149                     targetResults, targetStringList, difference.getAddedStart(),
150                     margin);
151 
152                 int deletedLines =
153                     difference.getDeletedEnd() + 1 -
154                         difference.getDeletedStart();
155 
156                 for (int i = 0; i < deletedLines; i++) {
157                     changedLines.add(CONTEXT_LINE);
158                 }
159 
160                 DiffResult diffResult = new DiffResult(
161                     difference.getDeletedStart(), changedLines);
162 
163                 targetResults.add(diffResult);
164             }
165             else if (difference.getDeletedEnd() == Difference.NONE) {
166 
167                 // Lines were added to target only.
168 
169                 _highlightLines(
170                     targetStringList, addedMarkerStart, addedMarkerEnd,
171                     difference.getAddedStart(), difference.getAddedEnd());
172 
173                 margin = _calculateMargin(
174                     sourceResults, targetResults, difference.getDeletedStart(),
175                     difference.getAddedStart(), margin);
176 
177                 List changedLines = _addMargins(
178                     sourceResults, sourceStringList,
179                     difference.getDeletedStart(), margin);
180 
181                 int addedLines =
182                     difference.getAddedEnd() + 1 - difference.getAddedStart();
183 
184                 for (int i = 0; i < addedLines; i++) {
185                     changedLines.add(CONTEXT_LINE);
186                 }
187 
188                 DiffResult diffResult = new DiffResult(
189                     difference.getAddedStart(), changedLines);
190 
191                 sourceResults.add(diffResult);
192 
193                 changedLines = _addMargins(
194                     targetResults, targetStringList, difference.getAddedStart(),
195                     margin);
196 
197                 _addResults(
198                     targetResults, targetStringList, changedLines,
199                     difference.getAddedStart(), difference.getAddedEnd());
200             }
201             else {
202 
203                 // Lines were deleted from source and added to target at the
204                 // same position. It needs to check for characters differences.
205 
206                 _checkCharDiffs(
207                     sourceResults, targetResults, sourceStringList,
208                     targetStringList, addedMarkerStart, addedMarkerEnd,
209                     deletedMarkerStart, deletedMarkerEnd, difference, margin);
210             }
211         }
212 
213         return results;
214     }
215 
216     private static List _addMargins(
217         List results, List stringList, int beginPos, int margin) {
218 
219         List changedLines = new ArrayList();
220 
221         if (margin == 0 || beginPos == 0) {
222             return changedLines;
223         }
224 
225         int i = beginPos - margin;
226 
227         for (; i < 0; i++) {
228             changedLines.add(CONTEXT_LINE);
229         }
230 
231         for (; i < beginPos; i++) {
232             if (i < stringList.size()) {
233                 changedLines.add(stringList.get(i));
234             }
235         }
236 
237         return changedLines;
238     }
239 
240     private static void _addResults(
241         List results, List stringList, List changedLines, int start, int end) {
242 
243         changedLines.addAll(stringList.subList(start, end + 1));
244 
245         DiffResult diffResult = new DiffResult(
246             start, changedLines);
247 
248         results.add(diffResult);
249     }
250 
251     private static int _calculateMargin(
252         List sourceResults, List targetResults, int sourceBeginPos,
253         int targetBeginPos, int margin) {
254 
255         int sourceMargin = _checkOverlapping(
256             sourceResults, sourceBeginPos, margin);
257         int targetMargin = _checkOverlapping(
258             targetResults, targetBeginPos, margin);
259 
260         if (sourceMargin < targetMargin) {
261             return sourceMargin;
262         }
263 
264         return targetMargin;
265     }
266 
267     private static void _checkCharDiffs(
268         List sourceResults, List targetResults, List sourceStringList,
269         List targetStringList, String addedMarkerStart, String addedMarkerEnd,
270         String deletedMarkerStart, String deletedMarkerEnd,
271         Difference difference, int margin) {
272 
273         boolean aligned = false;
274 
275         int i = difference.getDeletedStart();
276         int j = difference.getAddedStart();
277 
278         // A line with changed characters may have its position shifted some
279         // lines above or below. These for loops will try to align these lines.
280         // While these lines are not aligned, highlight them as either additions
281         // or deletions.
282 
283         for (; i <= difference.getDeletedEnd(); i++) {
284             for (; j <= difference.getAddedEnd(); j++) {
285                 if (_lineDiff(
286                         sourceResults, targetResults, sourceStringList,
287                         targetStringList, addedMarkerStart, addedMarkerEnd,
288                         deletedMarkerStart, deletedMarkerEnd, i, j, false)) {
289 
290                     aligned = true;
291 
292                     break;
293                 }
294                 else {
295                     _highlightLines(
296                         targetStringList, addedMarkerStart, addedMarkerEnd, j,
297                         j);
298 
299                     DiffResult targetResult = new DiffResult(
300                         j, targetStringList.subList(j, j + 1));
301 
302                     targetResults.add(targetResult);
303 
304                     sourceResults.add(new DiffResult(j, CONTEXT_LINE));
305                 }
306             }
307 
308             if (aligned) {
309                  break;
310             }
311             else {
312                 _highlightLines(
313                     sourceStringList, deletedMarkerStart, deletedMarkerEnd, i,
314                     i);
315 
316                 DiffResult sourceResult = new DiffResult(
317                     i, sourceStringList.subList(i, i + 1));
318 
319                 sourceResults.add(sourceResult);
320 
321                 targetResults.add(new DiffResult(i, CONTEXT_LINE));
322             }
323         }
324 
325         i = i + 1;
326         j = j + 1;
327 
328         // Lines are aligned, check for differences of the following lines.
329 
330         for (; i <= difference.getDeletedEnd() && j <= difference.getAddedEnd();
331                 i++, j++) {
332 
333             _lineDiff(
334                 sourceResults, targetResults, sourceStringList,
335                 targetStringList, addedMarkerStart, addedMarkerEnd,
336                 deletedMarkerStart, deletedMarkerEnd, i, j, true);
337         }
338 
339         // After the for loop above, some lines might remained unchecked.
340         // They are considered as deletions or additions.
341 
342         for (; i <= difference.getDeletedEnd();i++) {
343             _highlightLines(
344                 sourceStringList, deletedMarkerStart, deletedMarkerEnd, i, i);
345 
346             DiffResult sourceResult = new DiffResult(
347                 i, sourceStringList.subList(i, i + 1));
348 
349             sourceResults.add(sourceResult);
350 
351             targetResults.add(new DiffResult(i, CONTEXT_LINE));
352         }
353 
354         for (; j <= difference.getAddedEnd(); j++) {
355             _highlightLines(
356                 targetStringList, addedMarkerStart, addedMarkerEnd, j, j);
357 
358             DiffResult targetResult = new DiffResult(
359                 j, targetStringList.subList(j, j + 1));
360 
361             targetResults.add(targetResult);
362 
363             sourceResults.add(new DiffResult(j, CONTEXT_LINE));
364         }
365     }
366 
367     private static int _checkOverlapping(
368         List results, int beginPos, int margin) {
369 
370         if (results.size() == 0 || (beginPos - margin) < 0) {
371             return margin;
372         }
373 
374         DiffResult lastDiff = (DiffResult)results.get(results.size() - 1);
375 
376         if (lastDiff.getChangedLines().size() == 0) {
377             return margin;
378         }
379 
380         int lastChangedLine = (lastDiff.getLineNumber() - 1) +
381             lastDiff.getChangedLines().size();
382 
383         int currentChangedLine = beginPos - margin;
384 
385         if ((lastDiff.getChangedLines().size() == 1) &&
386             (lastDiff.getChangedLines().get(0).equals(CONTEXT_LINE))) {
387 
388             currentChangedLine = currentChangedLine + 1;
389         }
390 
391         if (currentChangedLine < lastChangedLine) {
392             return margin + currentChangedLine - lastChangedLine;
393         }
394 
395         return margin;
396     }
397 
398     private static void _highlightChars(
399         List stringList, String markerStart, String markerEnd, int beginPos,
400         int endPos) {
401 
402         String start = markerStart + stringList.get(beginPos);
403 
404         stringList.set(beginPos, start);
405 
406         String end = stringList.get(endPos) + markerEnd;
407 
408         stringList.set(endPos, end);
409     }
410 
411     private static void _highlightLines(
412         List stringList, String markerStart, String markerEnd, int beginPos,
413         int endPos) {
414 
415         for (int i = beginPos; i <= endPos; i++) {
416             stringList.set(i, markerStart + stringList.get(i) + markerEnd);
417         }
418     }
419 
420     private static boolean _lineDiff(
421         List sourceResults, List targetResults, List sourceStringList,
422         List targetStringList, String addedMarkerStart, String addedMarkerEnd,
423         String deletedMarkerStart, String deletedMarkerEnd,
424         int sourceChangedLine, int targetChangedLine, boolean aligned) {
425 
426         String source = (String)sourceStringList.get(sourceChangedLine);
427         String target = (String)targetStringList.get(targetChangedLine);
428 
429         // Convert the lines to lists where each element are chars of the lines.
430 
431         List sourceList = _toList(source);
432         List targetList = _toList(target);
433 
434         Diff diff = new Diff(sourceList, targetList);
435 
436         List differences = diff.diff();
437 
438         Iterator itr = differences.iterator();
439 
440         int deletedChars = 0;
441         int addedChars = 0;
442 
443         // The following while loop will calculate how many characters of
444         // the source line need to be changed to be equals to the target line.
445 
446         while (itr.hasNext() && !aligned) {
447             Difference difference = (Difference)itr.next();
448 
449             if (difference.getDeletedEnd() != Difference.NONE) {
450                 deletedChars =
451                     deletedChars +
452                     (difference.getDeletedEnd() -
453                         difference.getDeletedStart() + 1);
454             }
455 
456             if (difference.getAddedEnd() != Difference.NONE) {
457                 addedChars =
458                     addedChars +
459                     (difference.getAddedEnd() - difference.getAddedStart() + 1);
460             }
461         }
462 
463         // If a lot of changes were needed (more than half of the source line
464         // length), consider this as not aligned yet.
465 
466         if ((deletedChars > (sourceList.size() / 2)) ||
467             (addedChars > sourceList.size() / 2)) {
468 
469             return false;
470         }
471 
472         itr = differences.iterator();
473 
474         boolean sourceChanged = false;
475         boolean targetChanged = false;
476 
477         // Iterate over Differences between chars of these lines.
478 
479         while (itr.hasNext()) {
480             Difference difference = (Difference)itr.next();
481 
482             if (difference.getAddedEnd() == Difference.NONE) {
483 
484                 // Chars were deleted from source only.
485 
486                 _highlightChars(
487                     sourceList, deletedMarkerStart,
488                     deletedMarkerEnd, difference.getDeletedStart(),
489                     difference.getDeletedEnd());
490 
491                 sourceChanged = true;
492             }
493             else if (difference.getDeletedEnd() == Difference.NONE) {
494 
495                 // Chars were added to target only.
496 
497                 _highlightChars(
498                     targetList, addedMarkerStart, addedMarkerEnd,
499                     difference.getAddedStart(), difference.getAddedEnd());
500 
501                 targetChanged = true;
502             }
503             else {
504 
505                 // Chars were both deleted and added.
506 
507                 _highlightChars(
508                     sourceList, deletedMarkerStart,
509                     deletedMarkerEnd, difference.getDeletedStart(),
510                     difference.getDeletedEnd());
511 
512                 sourceChanged = true;
513 
514                 _highlightChars(
515                     targetList, addedMarkerStart, addedMarkerEnd,
516                     difference.getAddedStart(), difference.getAddedEnd());
517 
518                 targetChanged = true;
519             }
520         }
521 
522         if (sourceChanged) {
523             DiffResult sourceResult = new DiffResult(
524                 sourceChangedLine, _toString(sourceList));
525 
526             sourceResults.add(sourceResult);
527 
528             if (!targetChanged) {
529                 DiffResult targetResult = new DiffResult(
530                     targetChangedLine, target);
531 
532                 targetResults.add(targetResult);
533             }
534         }
535 
536         if (targetChanged) {
537             if (!sourceChanged) {
538                 DiffResult sourceResult = new DiffResult(
539                     sourceChangedLine, source);
540 
541                 sourceResults.add(sourceResult);
542             }
543 
544             DiffResult targetResult = new DiffResult(
545                 targetChangedLine, _toString(targetList));
546 
547             targetResults.add(targetResult);
548         }
549 
550         return true;
551     }
552 
553     private static List _toList(String line) {
554         String[] stringArray = line.split(StringPool.BLANK);
555 
556         List result = new ArrayList();
557 
558         for (int i = 1; i < stringArray.length; i++) {
559             result.add(stringArray[i]);
560         }
561 
562         return result;
563     }
564 
565     private static String _toString(List line) {
566         StringMaker sm = new StringMaker();
567 
568         Iterator itr = line.iterator();
569 
570         while (itr.hasNext()) {
571             sm.append((String)itr.next());
572         }
573 
574         return sm.toString();
575     }
576 
577 }