View Javadoc
1   /*
2    * Copyright 2007 Edward Kuns
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * $Id: XMLDiff.java 0000 2007-01-11 00:00:00Z ekuns $
17   */
18  package org.castor.xmlctf.xmldiff;
19  
20  import java.io.PrintWriter;
21  import java.util.Iterator;
22  import java.util.LinkedList;
23  import java.util.List;
24  import java.util.StringTokenizer;
25  
26  import org.castor.xmlctf.xmldiff.xml.XMLFileReader;
27  import org.castor.xmlctf.xmldiff.xml.nodes.Attribute;
28  import org.castor.xmlctf.xmldiff.xml.nodes.Element;
29  import org.castor.xmlctf.xmldiff.xml.nodes.ParentNode;
30  import org.castor.xmlctf.xmldiff.xml.nodes.Root;
31  import org.castor.xmlctf.xmldiff.xml.nodes.XMLNode;
32  
33  /**
34   * A utility class used to compare two XMLNodes, or XML input files and report
35   * the differences between them.
36   *
37   * @author <a href="mailto:edward.kuns@aspect.com">Edward Kuns</a>
38   * @version $Revision: 0000 $ $Date: 2007-01-11 00:00:00 -0600 (Thu, 11 Jan 2007) $
39   * @since Castor 1.1
40   */
41  public class XMLDiff {
42  
43      /** The namespace of XML Schema. */
44      private static final String XMLSCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
45  
46      /** Filename of the 1st XML item being compared. */
47      private final String      _file1;
48      /** Filename of the 2nd XML item being compared. */
49      private final String      _file2;
50      /** PrintWriter used for this comparison. */
51      private final PrintWriter _pw               = new PrintWriter(System.out, true);
52  
53      /**
54       * A boolean to indicate whether or not child order should be strictly
55       * enforced. By default child order is treated as "all", meaning that as
56       * long as a child exists, it doesn't matter which order the children occur.
57       */
58      private boolean           _strictChildOrder = false;
59      /** True if we should print errors to the PrintWriter. */
60      private boolean           _print            = true;
61      /** True if we have not yet printed the diff header. */
62      private boolean           _header           = true;
63  
64      /**
65       * Constructs an XMLDiff object that is ready to compare the two provided
66       * XML files.
67       *
68       * @param file1 The first XML file for comparison.
69       * @param file2 The second XML file for comparison.
70       */
71      public XMLDiff(final String file1, final String file2) {
72          if (file1 == null) {
73              String err = "The argument 'file1' may not be null.";
74              throw new IllegalArgumentException(err);
75          }
76  
77          if (file2 == null) {
78              String err = "The argument 'file2' may not be null.";
79              throw new IllegalArgumentException(err);
80          }
81  
82          _file1 = file1;
83          _file2 = file2;
84      }
85  
86      /**
87       * Compares the two XML documents located at the given URL locations.
88       * Returns 0, if no differences are found, otherwise returns a positive
89       * number indicating the number of differences.
90       * <p>
91       * This is the only public method in this class.
92       *
93       * @return 0, if no differences are found, otherwise a positive number
94       *         indicating the number of differences.
95       * @throws java.io.IOException an this occurs while reading
96       */
97      public int compare() throws java.io.IOException {
98          XMLFileReader reader1 = new XMLFileReader(_file1);
99          XMLNode node1 = reader1.read();
100 
101         XMLFileReader reader2 = new XMLFileReader(_file2);
102         XMLNode node2 = reader2.read();
103 
104         return compareNodes(node1, node2);
105     }
106 
107     /**
108      * Compares the given XMLNodes. Returns 0 if no differences are found,
109      * otherwise returns a positive number indicating the number of differences.
110      *
111      * @param node1 the first XMLNode to compare
112      * @param node2 the second XMLNode to compare
113      * @return 0, if no differences are found, otherwise a positive number
114      *         indicating the number of differences.
115      */
116     private int compareNodes(final XMLNode node1, final XMLNode node2) {
117         // Compare node types
118         if (!hasSameType(node1, node2)) {
119             if (_print) {
120                 _pw.println("Types differ: <" + node1.getLocalName() + "> and <"
121                             + node2.getLocalName() + "> for" + node1.getNodeLocation());
122             }
123             return 1;
124         }
125 
126         int diffCount = 0;
127 
128         String ns1 = node1.getNamespaceURI();
129         String ns2 = node2.getNamespaceURI();
130         if (!compareTextNullEqualsEmpty(ns1, ns2)) {
131             if (_print) {
132                 _pw.println("Namespaces differ: ('" + ns1 + "' != '" + ns2 + "') for "
133                             + node1.getNodeLocation());
134             }
135 
136             ++diffCount;
137         }
138 
139         // Compare local names (if both are null it's OK)
140         String name1 = node1.getLocalName();
141         String name2 = node2.getLocalName();
142 
143         if (name1 == null && name2 != null) {
144             if (_print) {
145                 _pw.println("Names differ: null vs. <" + name2 + "> for "
146                             + node1.getNodeLocation());
147             }
148             ++diffCount;
149             return diffCount;
150         } else if (name2 == null && name1 != null) {
151             if (_print) {
152                 _pw.println("Names differ: <" + name1 + "> vs null for "
153                             + node1.getNodeLocation());
154             }
155             ++diffCount;
156             return diffCount;
157         } else if (name1 != null && !name1.equals(name2)) {
158             if (_print) {
159                 _pw.println("Names differ: <" + name1 + "> != <" + name2 + "> for "
160                             + node1.getNodeLocation());
161             }
162             ++diffCount;
163             return diffCount;
164         }
165 
166         // Compare node content
167         switch (node1.getNodeType()) {
168             case XMLNode.ROOT:
169                 diffCount += compareElementsStrictOrder((Root)node1, (Root)node2);
170                 break;
171 
172             case XMLNode.ELEMENT:
173                 diffCount += compareElements((Element)node1, (Element)node2);
174                 break;
175 
176             case XMLNode.ATTRIBUTE:
177                 diffCount += compareStringValues(node1, node2);
178                 break;
179 
180             case XMLNode.TEXT:
181                 diffCount += compareStringValues(node1, node2);
182                 break;
183 
184             case XMLNode.PROCESSING_INSTRUCTION:
185                 // We don't care about comparing processing instructions
186                 break;
187 
188             default:
189                 System.out.println("Unexpected node type in XMLDiff: " + node1.getNodeType());
190                 break;
191         }
192 
193         return diffCount;
194     } // -- compare
195 
196     /**
197      * Compares the String values of the provided XML Nodes.
198      * @param node1 The first node to be compared
199      * @param node2 The second node to be compared
200      * @return 0 if the String values are the same, 1 otherwise
201      */
202     private int compareStringValues(final XMLNode node1, final XMLNode node2) {
203         if (compareText(node1.getStringValue(), node2.getStringValue())) {
204             return 0;
205         }
206 
207         if (_print) {
208             _pw.println();
209             printLocationInfo(node1, node2);
210             printText("- ", node1.getStringValue());
211             _pw.println();
212             printText("+ ", node2.getStringValue());
213         }
214         return 1;
215     }
216 
217     /**
218      * Checks the attributes of the given nodes to make sure they are identical
219      * but does not care about the attribute order, as per XML 1.0.
220      */
221     private int compareAttributes(final Element node1, final Element node2) {
222         int diffCount = 0;
223 
224         for (Iterator i = node1.getAttributeIterator(); i.hasNext(); ) {
225             Attribute attr1 = (Attribute) i.next();
226 
227             // Does node2 have this attribute at all?
228             String attValue2 = node2.getAttribute(attr1.getNamespaceURI(), attr1.getLocalName());
229             if (attValue2 == null) {
230                 // Is this an attribute that is allowed to be missing sometimes?
231                 if (missingattributeIsIgnorable(attr1)) {
232                     continue;
233                 }
234 
235                 // If not, complain
236                 printElementChangeBlock(node1, node2, "Attribute '"
237                                         + attr1.getNodeLocation()
238                                         + "' does not exist in the second document.");
239                 diffCount++;
240                 continue;
241             }
242 
243             // If it does, does it have the same value?
244             String attValue1 = attr1.getStringValue();
245             if (!compareTextLikeQName(node1, node2, attValue1, attValue2)) {
246                 printElementChangeBlock(node1, node2, "Attribute '"
247                                         + attr1.getNodeLocation()
248                                         + "' values are different.");
249                 diffCount++;
250             }
251         }
252 
253         // Look for attributes on node 2 that are not on node 1
254         for (Iterator i = node2.getAttributeIterator(); i.hasNext(); ) {
255             Attribute attr2 = (Attribute) i.next();
256             if (node1.getAttribute(attr2.getNamespaceURI(), attr2.getLocalName()) == null) {
257                 // Is this an attribute that is allowed to be missing sometimes?
258                 if (missingattributeIsIgnorable(attr2)) {
259                     continue;
260                 }
261 
262                 // If not, complain
263                 printElementChangeBlock(node1, node2, "Attribute '"
264                                         + attr2.getNodeLocation()
265                                         + "' does not exist in the first document.");
266                 diffCount++;
267             }
268         }
269 
270         return diffCount;
271     }
272 
273     private boolean missingattributeIsIgnorable(Attribute attr) {
274         String name = attr.getLocalName();
275         String ns = attr.getNamespaceURI();
276         if (ns == null) {
277             ns = "";
278         }
279 
280         if (name.equals("noNamespaceSchemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
281             return true;
282         }
283         if (name.equals("schemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
284             return true;
285         }
286         return false;
287     }
288 
289     /**
290      * Compare the provided attribute text as if it were a QName.
291      *
292      * @param node1 Node containing attribute 1
293      * @param node2 Node containing attribute 2
294      * @param attValue1 String value of attribute 1
295      * @param attValue2 String value of attribute 2
296      * @return true if the attributes are equal directly, or equal when compared
297      *         as QNames
298      */
299     private boolean compareTextLikeQName(final XMLNode node1, final XMLNode node2,
300                                          final String attValue1, final String attValue2) {
301         // If strings are equal, return equal
302         if (compareText(attValue1, attValue2)) {
303             return true;
304         }
305 
306         // If neither attribute value has ":" then return false
307         final int idx1 = attValue1.indexOf(':');
308         final int idx2 = attValue2.indexOf(':');
309         if (idx1 < 0 && idx2 < 0) {
310             return false;
311         }
312 
313         final String prefix1;
314         final String prefix2;
315         final String value1;
316         final String value2;
317 
318         if (idx1 >= 0) {
319             value1 = attValue1.substring(idx1 + 1);
320             prefix1 = attValue1.substring(0, idx1);
321         } else {
322             value1 = attValue1;
323             prefix1 = "";
324         }
325 
326         if (idx2 >= 0) {
327             value2 = attValue2.substring(idx2 + 1);
328             prefix2 = attValue2.substring(0, idx2);
329         } else {
330             value2 = attValue2;
331             prefix2 = "";
332         }
333 
334         // Return true if text value is equal and namesspaces are equal
335         return compareText(value1, value2)
336                && compareTextNullEqualsEmpty(node1.getNamespaceURI(prefix1),
337                                              node2.getNamespaceURI(prefix2));
338     }
339 
340     /**
341      * Compares the two XMLNodes, both of which must be of type XMLNode.ELEMENT
342      * or XMLNode.ROOT.
343      *
344      * @param node1 the primary XMLNode to comapare against
345      * @param node2 the XMLNode to compare against node1
346      * @return the number of differences found in this document tree
347      */
348     private int compareElements(final Element node1, final Element node2) {
349         int diffCount = compareAttributes(node1, node2);
350 
351         if (_strictChildOrder) {
352             diffCount += compareElementsStrictOrder(node1, node2);
353         } else {
354             diffCount += compareElementsLooseOrder(node1, node2);
355         }
356         return diffCount;
357     }
358 
359     /**
360      * Compares the two XMLNodes (not counting attributes) requiring strict
361      * child order, both of which must be of type XMLNode.ELEMENT or
362      * XMLNode.ROOT.
363      *
364      * @param node1 the primary XMLNode to comapare against
365      * @param node2 the XMLNode to compare against node1
366      * @return the number of differences found in this document tree
367      */
368     private int compareElementsStrictOrder(final ParentNode node1, final ParentNode node2) {
369         int diffCount = 0;
370 
371         Iterator i1 = node1.getChildIterator();
372         Iterator i2 = node2.getChildIterator();
373 
374         // Skip all ignorable whitespace and compare with strict order
375         if (i1.hasNext() && i2.hasNext()) {
376             XMLNode child1 = (XMLNode) i1.next();
377             XMLNode child2 = (XMLNode) i2.next();
378             while (child1 != null && child2 != null) {
379                 if (nodeIsIgnorableText(child1)) {
380                     if (!i1.hasNext()) {
381                         break;
382                     }
383                     child1 = (XMLNode) i1.next();
384                     continue;
385                 }
386                 if (nodeIsIgnorableText(child2)) {
387                     if (!i2.hasNext()) {
388                         break;
389                     }
390                     child2 = (XMLNode) i2.next();
391                     continue;
392                 }
393 
394                 diffCount += compareNodes(child1, child2);
395 
396                 if (!i1.hasNext() || !i2.hasNext()) {
397                     break;
398                 }
399 
400                 child1 = (XMLNode) i1.next();
401                 child2 = (XMLNode) i2.next();
402             }
403         }
404 
405         // If we have excess nodes for root1, complain about missing elements
406         while (i1.hasNext()) {
407             XMLNode child1 = (XMLNode) i1.next();
408             if (!nodeIsIgnorableText(child1)) {
409                 if (_print) {
410                     printLocationInfo(child1, null);
411                     _pw.println("- ");
412                 }
413                 ++diffCount;
414             }
415         }
416 
417         // If we have excess nodes for root2, complain about extra elements
418         while (i2.hasNext()) {
419             XMLNode child2 = (XMLNode) i2.next();
420             if (!nodeIsIgnorableText(child2)) {
421                 if (_print) {
422                     printLocationInfo(child2, null);
423                     _pw.println("- ");
424                 }
425                 ++diffCount;
426             }
427         }
428 
429         return diffCount;
430     }
431 
432     /**
433      * Compares the two XMLNodes, both of which must be of type XMLNode.ELEMENT
434      * or XMLNode.ROOT.
435      *
436      * @param node1 the primary XMLNode to comapare against
437      * @param node2 the XMLNode to compare against node1
438      * @return the number of differences found in this document tree
439      */
440     private int compareElementsLooseOrder(final Element node1, final Element node2) {
441         int diffCount = 0;
442 
443         final List used = new LinkedList();
444 
445         for (Iterator i1 = node1.getChildIterator(); i1.hasNext(); ) {
446             XMLNode child1 = (XMLNode) i1.next();
447             // Ignore whitespace
448             // If we find an exact match, continue with the next node in the list
449             if (nodeIsIgnorableText(child1) || foundExactMatch(node2, child1, used)) {
450                 continue;
451             }
452 
453             // Check for the best match and use it to count diffs & complain
454             if (_print) {
455                 diffCount += closestMatchDifference(node2, child1, used);
456             } else {
457                 diffCount++;
458             }
459         }
460 
461         // Complain about all children of node2 that are not used and not ignorable whitespace
462         for (Iterator i2 = node2.getChildIterator(); i2.hasNext(); ) {
463             XMLNode child2 = (XMLNode) i2.next();
464             if (!nodeIsIgnorableText(child2) && !used.contains(child2)) {
465                 if (_print) {
466                     _pw.println("Extra child node: " + child2.getNodeLocation());
467                 }
468                 ++diffCount;
469             }
470         }
471 
472         return diffCount;
473     }
474 
475     /**
476      * Looks for an exact match for the provided target XMLNode. If found,
477      * returns true. Suppresses complaints during search. If an exact match is
478      * found, the match is added to the list of "used" items.
479      *
480      * @param parent The node whose children to search for an exact match
481      * @param target The XMLNode we are trying to match
482      * @param usedList The list of children of node2 that have already matched
483      *        other objects
484      * @return true if an exact match is found for the provided node.
485      */
486     private boolean foundExactMatch(final Element parent, XMLNode target, final List usedList) {
487         // Suppress complaints when we are looking for an exact match.
488         boolean previousPrint = _print;
489 
490         _print = false; // Suppress printing when we are "just looking"
491         boolean found = false;
492         for (Iterator i2 = parent.getChildIterator(); i2.hasNext(); ) {
493             XMLNode child2 = (XMLNode) i2.next();
494             if (!usedList.contains(child2) && compareNodes(target, child2) == 0) {
495                 usedList.add(child2);
496                 found = true;
497                 break;
498             }
499         }
500 
501         // Restore printing
502         _print = previousPrint;
503         return found;
504     }
505 
506     /**
507      * Looks for a close patch to the provided target XMLNode among the children
508      * of the provided parent node.  The difference between the closest match
509      * and the target is returned. If we cannot even find a close match, then
510      * we declare the target missing and return one difference.
511      * <p>
512      * Note:  This method is only called when printing is enabled.
513      *
514      * @param parent The node whose children to search for an exact match
515      * @param target The XMLNode we are trying to match
516      * @param usedList The list of children of node2 that have already matched
517      *        other objects
518      * @return the difference count
519      */
520     private int closestMatchDifference(final Element parent, final XMLNode target,
521                                        final List usedList) {
522         for (Iterator i2 = parent.getChildIterator(); i2.hasNext(); ) {
523             XMLNode child2 = (XMLNode) i2.next();
524             if (!usedList.contains(child2) && hasSameType(target, child2)
525                 && hasSameName(target, child2)) {
526                 usedList.add(child2);
527                 return compareNodes(target, child2);
528             }
529         }
530 
531         _pw.println("Missing child node: " + target.getNodeLocation() + " for "
532                     + target.getNodeLocation());
533         return 1;
534     }
535 
536     /**
537      * Returns true if the given node is a TEXT node that contains only
538      * ignorable whitespace.
539      *
540      * @return true if the given node is a TEXT node that contains only
541      *         ignorable whitespace.
542      */
543     private boolean nodeIsIgnorableText(final XMLNode child) {
544         return (child.getNodeType() == XMLNode.TEXT && compareText(child.getStringValue(), ""));
545     }
546 
547     /**
548      * Returns true if the two Strings are equal, ignoring whitespace
549      * differences that are ignorable.
550      *
551      * @return true if the two Strings are equal, ignoring whitespace
552      *         differences that are ignorable.
553      */
554     private boolean compareText(final String s1, final String s2) {
555         if (s1.equals(s2)) {
556             return true;
557         }
558 
559         // Strings are different; compare token by token to ignore whitespace differences
560         StringTokenizer st1 = new StringTokenizer(s1);
561         StringTokenizer st2 = new StringTokenizer(s2);
562 
563         while (st1.hasMoreTokens() && st2.hasMoreTokens()) {
564             if (!st1.nextToken().equals(st2.nextToken())) {
565                 return false;
566             }
567         }
568 
569         // If the Strings have different numbers of tokens, fail
570         if (st1.hasMoreTokens() || st2.hasMoreTokens()) {
571             return false;
572         }
573 
574         return true;
575     }
576 
577     /**
578      * Compares two strings. Considers null Strings to be the same as an empty
579      * String.
580      *
581      * @param one The first string to compare.
582      * @param two The second string to compare.
583      * @return true if the two strings are equals or are both "null or empty"
584      */
585     private boolean compareTextNullEqualsEmpty(String one, String two) {
586         String text1 = (one == null) ? "" : one;
587         String text2 = (two == null) ? "" : two;
588         return text1.equals(text2);
589     }
590 
591     private boolean hasSameName(final XMLNode node1, final XMLNode node2) {
592         String name1 = node1.getLocalName();
593         String name2 = node2.getLocalName();
594 
595         // ROOT may or may not have null name, so we must check for possible null values
596         if (name1 == null) {
597             return (name2 == null);
598         }
599         return name1.equals(name2);
600     }
601 
602     private boolean hasSameType(final XMLNode node1, final XMLNode node2) {
603         return (node1.getNodeType() == node2.getNodeType());
604     }
605 
606     private void printLocationInfo(final XMLNode node1, final XMLNode node2) {
607         if (_header) {
608             _header = false;
609             _pw.println("--- " + _file1);
610             _pw.println("+++ " + _file2);
611         }
612         _pw.print("@@ -");
613         _pw.print(node1.getNodeLocation());
614         _pw.print(" +");
615         _pw.print(node2.getNodeLocation());
616         _pw.println(" @@");
617     }
618 
619     private void printElementChangeBlock(final Element node1, final Element node2, final String msg) {
620         if (_print) {
621             _pw.print("- ");
622             printElement(node1);
623             _pw.print("+ ");
624             printElement(node2);
625             if (msg != null) {
626                 _pw.println(msg);
627             }
628         }
629     }
630 
631     private void printElement(final Element node) {
632         _pw.print('<' + node.getLocalName());
633 
634         for (Iterator i = node.getAttributeIterator(); i.hasNext(); ) {
635             Attribute attr = (Attribute) i.next();
636             _pw.print(' ');
637             _pw.print(attr.getLocalName());
638             _pw.print("=\"");
639             _pw.print(attr.getStringValue());
640             _pw.print("\"");
641         }
642 
643         _pw.println('>');
644     }
645 
646     /**
647      * Prints the given text. Each line of the text is prefixed with the given
648      * prefix. If <code>text</code> has multiple newlines, the prefix will be
649      * printed on each line.
650      *
651      * @param prefix A prefix to display on each line of output
652      * @param text The text to display
653      */
654     private void printText(final String prefix, String text) {
655         if (text == null) {
656             _pw.println(prefix);
657             return;
658         }
659 
660         int idx = 0;
661         while ((idx = text.indexOf('\n')) >= 0) {
662             _pw.print(prefix);
663             _pw.println(text.substring(0, idx));
664             text = text.substring(idx + 1);
665         }
666         _pw.print(prefix);
667         _pw.println(text);
668     }
669 
670 }