View Javadoc
1   /*
2    * Copyright 2007 Edward Kuns
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5    * in compliance with the License. You may obtain a copy of the License at
6    *
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software distributed under the License
10   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11   * or implied. See the License for the specific language governing permissions and limitations under
12   * the License.
13   *
14   * $Id: XMLDiff.java 0000 2007-01-11 00:00:00Z ekuns $
15   */
16  package org.castor.xmlctf.xmldiff;
17  
18  import java.io.PrintWriter;
19  import java.util.Iterator;
20  import java.util.LinkedList;
21  import java.util.List;
22  import java.util.StringTokenizer;
23  
24  import org.castor.xmlctf.xmldiff.xml.XMLFileReader;
25  import org.castor.xmlctf.xmldiff.xml.nodes.Attribute;
26  import org.castor.xmlctf.xmldiff.xml.nodes.Element;
27  import org.castor.xmlctf.xmldiff.xml.nodes.ParentNode;
28  import org.castor.xmlctf.xmldiff.xml.nodes.Root;
29  import org.castor.xmlctf.xmldiff.xml.nodes.XMLNode;
30  
31  /**
32   * A utility class used to compare two XMLNodes, or XML input files and report the differences
33   * between them.
34   *
35   * @author <a href="mailto:edward.kuns@aspect.com">Edward Kuns</a>
36   * @version $Revision: 0000 $ $Date: 2007-01-11 00:00:00 -0600 (Thu, 11 Jan 2007) $
37   * @since Castor 1.1
38   */
39  public class XMLDiff {
40  
41    /** The namespace of XML Schema. */
42    private static final String XMLSCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
43  
44    /** Filename of the 1st XML item being compared. */
45    private final String _file1;
46    /** Filename of the 2nd XML item being compared. */
47    private final String _file2;
48    /** PrintWriter used for this comparison. */
49    private final PrintWriter _pw = new PrintWriter(System.out, true);
50  
51    /**
52     * A boolean to indicate whether or not child order should be strictly enforced. By default child
53     * order is treated as "all", meaning that as long as a child exists, it doesn't matter which
54     * order the children occur.
55     */
56    private boolean _strictChildOrder = false;
57    /** True if we should print errors to the PrintWriter. */
58    private boolean _print = true;
59    /** True if we have not yet printed the diff header. */
60    private boolean _header = true;
61  
62    /**
63     * Constructs an XMLDiff object that is ready to compare the two provided XML files.
64     *
65     * @param file1 The first XML file for comparison.
66     * @param file2 The second XML file for comparison.
67     */
68    public XMLDiff(final String file1, final String file2) {
69      if (file1 == null) {
70        String err = "The argument 'file1' may not be null.";
71        throw new IllegalArgumentException(err);
72      }
73  
74      if (file2 == null) {
75        String err = "The argument 'file2' may not be null.";
76        throw new IllegalArgumentException(err);
77      }
78  
79      _file1 = file1;
80      _file2 = file2;
81    }
82  
83    /**
84     * Compares the two XML documents located at the given URL locations. Returns 0, if no differences
85     * are found, otherwise returns a positive number indicating the number of differences.
86     * <p>
87     * This is the only public method in this class.
88     *
89     * @return 0, if no differences are found, otherwise a positive number indicating the number of
90     *         differences.
91     * @throws java.io.IOException an this occurs while reading
92     */
93    public int compare() throws java.io.IOException {
94      XMLFileReader reader1 = new XMLFileReader(_file1);
95      XMLNode node1 = reader1.read();
96  
97      XMLFileReader reader2 = new XMLFileReader(_file2);
98      XMLNode node2 = reader2.read();
99  
100     return compareNodes(node1, node2);
101   }
102 
103   /**
104    * Compares the given XMLNodes. Returns 0 if no differences are found, otherwise returns a
105    * positive number indicating the number of differences.
106    *
107    * @param node1 the first XMLNode to compare
108    * @param node2 the second XMLNode to compare
109    * @return 0, if no differences are found, otherwise a positive number indicating the number of
110    *         differences.
111    */
112   private int compareNodes(final XMLNode node1, final XMLNode node2) {
113     // Compare node types
114     if (!hasSameType(node1, node2)) {
115       if (_print) {
116         _pw.println("Types differ: <" + node1.getLocalName() + "> and <" + node2.getLocalName()
117             + "> for" + node1.getNodeLocation());
118       }
119       return 1;
120     }
121 
122     int diffCount = 0;
123 
124     String ns1 = node1.getNamespaceURI();
125     String ns2 = node2.getNamespaceURI();
126     if (!compareTextNullEqualsEmpty(ns1, ns2)) {
127       if (_print) {
128         _pw.println(
129             "Namespaces differ: ('" + ns1 + "' != '" + ns2 + "') for " + node1.getNodeLocation());
130       }
131 
132       ++diffCount;
133     }
134 
135     // Compare local names (if both are null it's OK)
136     String name1 = node1.getLocalName();
137     String name2 = node2.getLocalName();
138 
139     if (name1 == null && name2 != null) {
140       if (_print) {
141         _pw.println("Names differ: null vs. <" + name2 + "> for " + node1.getNodeLocation());
142       }
143       ++diffCount;
144       return diffCount;
145     } else if (name2 == null && name1 != null) {
146       if (_print) {
147         _pw.println("Names differ: <" + name1 + "> vs null for " + node1.getNodeLocation());
148       }
149       ++diffCount;
150       return diffCount;
151     } else if (name1 != null && !name1.equals(name2)) {
152       if (_print) {
153         _pw.println(
154             "Names differ: <" + name1 + "> != <" + name2 + "> for " + node1.getNodeLocation());
155       }
156       ++diffCount;
157       return diffCount;
158     }
159 
160     // Compare node content
161     switch (node1.getNodeType()) {
162       case XMLNode.ROOT:
163         diffCount += compareElementsStrictOrder((Root) node1, (Root) node2);
164         break;
165 
166       case XMLNode.ELEMENT:
167         diffCount += compareElements((Element) node1, (Element) node2);
168         break;
169 
170       case XMLNode.ATTRIBUTE:
171         diffCount += compareStringValues(node1, node2);
172         break;
173 
174       case XMLNode.TEXT:
175         diffCount += compareStringValues(node1, node2);
176         break;
177 
178       case XMLNode.PROCESSING_INSTRUCTION:
179         // We don't care about comparing processing instructions
180         break;
181 
182       default:
183         System.out.println("Unexpected node type in XMLDiff: " + node1.getNodeType());
184         break;
185     }
186 
187     return diffCount;
188   } // -- compare
189 
190   /**
191    * Compares the String values of the provided XML Nodes.
192    * 
193    * @param node1 The first node to be compared
194    * @param node2 The second node to be compared
195    * @return 0 if the String values are the same, 1 otherwise
196    */
197   private int compareStringValues(final XMLNode node1, final XMLNode node2) {
198     if (compareText(node1.getStringValue(), node2.getStringValue())) {
199       return 0;
200     }
201 
202     if (_print) {
203       _pw.println();
204       printLocationInfo(node1, node2);
205       printText("- ", node1.getStringValue());
206       _pw.println();
207       printText("+ ", node2.getStringValue());
208     }
209     return 1;
210   }
211 
212   /**
213    * Checks the attributes of the given nodes to make sure they are identical but does not care
214    * about the attribute order, as per XML 1.0.
215    */
216   private int compareAttributes(final Element node1, final Element node2) {
217     int diffCount = 0;
218 
219     for (Iterator i = node1.getAttributeIterator(); i.hasNext();) {
220       Attribute attr1 = (Attribute) i.next();
221 
222       // Does node2 have this attribute at all?
223       String attValue2 = node2.getAttribute(attr1.getNamespaceURI(), attr1.getLocalName());
224       if (attValue2 == null) {
225         // Is this an attribute that is allowed to be missing sometimes?
226         if (missingattributeIsIgnorable(attr1)) {
227           continue;
228         }
229 
230         // If not, complain
231         printElementChangeBlock(node1, node2,
232             "Attribute '" + attr1.getNodeLocation() + "' does not exist in the second document.");
233         diffCount++;
234         continue;
235       }
236 
237       // If it does, does it have the same value?
238       String attValue1 = attr1.getStringValue();
239       if (!compareTextLikeQName(node1, node2, attValue1, attValue2)) {
240         printElementChangeBlock(node1, node2,
241             "Attribute '" + attr1.getNodeLocation() + "' values are different.");
242         diffCount++;
243       }
244     }
245 
246     // Look for attributes on node 2 that are not on node 1
247     for (Iterator i = node2.getAttributeIterator(); i.hasNext();) {
248       Attribute attr2 = (Attribute) i.next();
249       if (node1.getAttribute(attr2.getNamespaceURI(), attr2.getLocalName()) == null) {
250         // Is this an attribute that is allowed to be missing sometimes?
251         if (missingattributeIsIgnorable(attr2)) {
252           continue;
253         }
254 
255         // If not, complain
256         printElementChangeBlock(node1, node2,
257             "Attribute '" + attr2.getNodeLocation() + "' does not exist in the first document.");
258         diffCount++;
259       }
260     }
261 
262     return diffCount;
263   }
264 
265   private boolean missingattributeIsIgnorable(Attribute attr) {
266     String name = attr.getLocalName();
267     String ns = attr.getNamespaceURI();
268     if (ns == null) {
269       ns = "";
270     }
271 
272     if (name.equals("noNamespaceSchemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
273       return true;
274     }
275     if (name.equals("schemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
276       return true;
277     }
278     return false;
279   }
280 
281   /**
282    * Compare the provided attribute text as if it were a QName.
283    *
284    * @param node1 Node containing attribute 1
285    * @param node2 Node containing attribute 2
286    * @param attValue1 String value of attribute 1
287    * @param attValue2 String value of attribute 2
288    * @return true if the attributes are equal directly, or equal when compared as QNames
289    */
290   private boolean compareTextLikeQName(final XMLNode node1, final XMLNode node2,
291       final String attValue1, final String attValue2) {
292     // If strings are equal, return equal
293     if (compareText(attValue1, attValue2)) {
294       return true;
295     }
296 
297     // If neither attribute value has ":" then return false
298     final int idx1 = attValue1.indexOf(':');
299     final int idx2 = attValue2.indexOf(':');
300     if (idx1 < 0 && idx2 < 0) {
301       return false;
302     }
303 
304     final String prefix1;
305     final String prefix2;
306     final String value1;
307     final String value2;
308 
309     if (idx1 >= 0) {
310       value1 = attValue1.substring(idx1 + 1);
311       prefix1 = attValue1.substring(0, idx1);
312     } else {
313       value1 = attValue1;
314       prefix1 = "";
315     }
316 
317     if (idx2 >= 0) {
318       value2 = attValue2.substring(idx2 + 1);
319       prefix2 = attValue2.substring(0, idx2);
320     } else {
321       value2 = attValue2;
322       prefix2 = "";
323     }
324 
325     // Return true if text value is equal and namesspaces are equal
326     return compareText(value1, value2) && compareTextNullEqualsEmpty(node1.getNamespaceURI(prefix1),
327         node2.getNamespaceURI(prefix2));
328   }
329 
330   /**
331    * Compares the two XMLNodes, both of which must be of type XMLNode.ELEMENT or XMLNode.ROOT.
332    *
333    * @param node1 the primary XMLNode to comapare against
334    * @param node2 the XMLNode to compare against node1
335    * @return the number of differences found in this document tree
336    */
337   private int compareElements(final Element node1, final Element node2) {
338     int diffCount = compareAttributes(node1, node2);
339 
340     if (_strictChildOrder) {
341       diffCount += compareElementsStrictOrder(node1, node2);
342     } else {
343       diffCount += compareElementsLooseOrder(node1, node2);
344     }
345     return diffCount;
346   }
347 
348   /**
349    * Compares the two XMLNodes (not counting attributes) requiring strict child order, both of which
350    * must be of type XMLNode.ELEMENT or XMLNode.ROOT.
351    *
352    * @param node1 the primary XMLNode to comapare against
353    * @param node2 the XMLNode to compare against node1
354    * @return the number of differences found in this document tree
355    */
356   private int compareElementsStrictOrder(final ParentNode node1, final ParentNode node2) {
357     int diffCount = 0;
358 
359     Iterator i1 = node1.getChildIterator();
360     Iterator i2 = node2.getChildIterator();
361 
362     // Skip all ignorable whitespace and compare with strict order
363     if (i1.hasNext() && i2.hasNext()) {
364       XMLNode child1 = (XMLNode) i1.next();
365       XMLNode child2 = (XMLNode) i2.next();
366       while (child1 != null && child2 != null) {
367         if (nodeIsIgnorableText(child1)) {
368           if (!i1.hasNext()) {
369             break;
370           }
371           child1 = (XMLNode) i1.next();
372           continue;
373         }
374         if (nodeIsIgnorableText(child2)) {
375           if (!i2.hasNext()) {
376             break;
377           }
378           child2 = (XMLNode) i2.next();
379           continue;
380         }
381 
382         diffCount += compareNodes(child1, child2);
383 
384         if (!i1.hasNext() || !i2.hasNext()) {
385           break;
386         }
387 
388         child1 = (XMLNode) i1.next();
389         child2 = (XMLNode) i2.next();
390       }
391     }
392 
393     // If we have excess nodes for root1, complain about missing elements
394     while (i1.hasNext()) {
395       XMLNode child1 = (XMLNode) i1.next();
396       if (!nodeIsIgnorableText(child1)) {
397         if (_print) {
398           printLocationInfo(child1, null);
399           _pw.println("- ");
400         }
401         ++diffCount;
402       }
403     }
404 
405     // If we have excess nodes for root2, complain about extra elements
406     while (i2.hasNext()) {
407       XMLNode child2 = (XMLNode) i2.next();
408       if (!nodeIsIgnorableText(child2)) {
409         if (_print) {
410           printLocationInfo(child2, null);
411           _pw.println("- ");
412         }
413         ++diffCount;
414       }
415     }
416 
417     return diffCount;
418   }
419 
420   /**
421    * Compares the two XMLNodes, both of which must be of type XMLNode.ELEMENT or XMLNode.ROOT.
422    *
423    * @param node1 the primary XMLNode to comapare against
424    * @param node2 the XMLNode to compare against node1
425    * @return the number of differences found in this document tree
426    */
427   private int compareElementsLooseOrder(final Element node1, final Element node2) {
428     int diffCount = 0;
429 
430     final List used = new LinkedList();
431 
432     for (Iterator i1 = node1.getChildIterator(); i1.hasNext();) {
433       XMLNode child1 = (XMLNode) i1.next();
434       // Ignore whitespace
435       // If we find an exact match, continue with the next node in the list
436       if (nodeIsIgnorableText(child1) || foundExactMatch(node2, child1, used)) {
437         continue;
438       }
439 
440       // Check for the best match and use it to count diffs & complain
441       if (_print) {
442         diffCount += closestMatchDifference(node2, child1, used);
443       } else {
444         diffCount++;
445       }
446     }
447 
448     // Complain about all children of node2 that are not used and not ignorable whitespace
449     for (Iterator i2 = node2.getChildIterator(); i2.hasNext();) {
450       XMLNode child2 = (XMLNode) i2.next();
451       if (!nodeIsIgnorableText(child2) && !used.contains(child2)) {
452         if (_print) {
453           _pw.println("Extra child node: " + child2.getNodeLocation());
454         }
455         ++diffCount;
456       }
457     }
458 
459     return diffCount;
460   }
461 
462   /**
463    * Looks for an exact match for the provided target XMLNode. If found, returns true. Suppresses
464    * complaints during search. If an exact match is found, the match is added to the list of "used"
465    * items.
466    *
467    * @param parent The node whose children to search for an exact match
468    * @param target The XMLNode we are trying to match
469    * @param usedList The list of children of node2 that have already matched other objects
470    * @return true if an exact match is found for the provided node.
471    */
472   private boolean foundExactMatch(final Element parent, XMLNode target, final List usedList) {
473     // Suppress complaints when we are looking for an exact match.
474     boolean previousPrint = _print;
475 
476     _print = false; // Suppress printing when we are "just looking"
477     boolean found = false;
478     for (Iterator i2 = parent.getChildIterator(); i2.hasNext();) {
479       XMLNode child2 = (XMLNode) i2.next();
480       if (!usedList.contains(child2) && compareNodes(target, child2) == 0) {
481         usedList.add(child2);
482         found = true;
483         break;
484       }
485     }
486 
487     // Restore printing
488     _print = previousPrint;
489     return found;
490   }
491 
492   /**
493    * Looks for a close patch to the provided target XMLNode among the children of the provided
494    * parent node. The difference between the closest match and the target is returned. If we cannot
495    * even find a close match, then we declare the target missing and return one difference.
496    * <p>
497    * Note: This method is only called when printing is enabled.
498    *
499    * @param parent The node whose children to search for an exact match
500    * @param target The XMLNode we are trying to match
501    * @param usedList The list of children of node2 that have already matched other objects
502    * @return the difference count
503    */
504   private int closestMatchDifference(final Element parent, final XMLNode target,
505       final List usedList) {
506     for (Iterator i2 = parent.getChildIterator(); i2.hasNext();) {
507       XMLNode child2 = (XMLNode) i2.next();
508       if (!usedList.contains(child2) && hasSameType(target, child2)
509           && hasSameName(target, child2)) {
510         usedList.add(child2);
511         return compareNodes(target, child2);
512       }
513     }
514 
515     _pw.println(
516         "Missing child node: " + target.getNodeLocation() + " for " + target.getNodeLocation());
517     return 1;
518   }
519 
520   /**
521    * Returns true if the given node is a TEXT node that contains only ignorable whitespace.
522    *
523    * @return true if the given node is a TEXT node that contains only ignorable whitespace.
524    */
525   private boolean nodeIsIgnorableText(final XMLNode child) {
526     return (child.getNodeType() == XMLNode.TEXT && compareText(child.getStringValue(), ""));
527   }
528 
529   /**
530    * Returns true if the two Strings are equal, ignoring whitespace differences that are ignorable.
531    *
532    * @return true if the two Strings are equal, ignoring whitespace differences that are ignorable.
533    */
534   private boolean compareText(final String s1, final String s2) {
535     if (s1.equals(s2)) {
536       return true;
537     }
538 
539     // Strings are different; compare token by token to ignore whitespace differences
540     StringTokenizer st1 = new StringTokenizer(s1);
541     StringTokenizer st2 = new StringTokenizer(s2);
542 
543     while (st1.hasMoreTokens() && st2.hasMoreTokens()) {
544       if (!st1.nextToken().equals(st2.nextToken())) {
545         return false;
546       }
547     }
548 
549     // If the Strings have different numbers of tokens, fail
550     if (st1.hasMoreTokens() || st2.hasMoreTokens()) {
551       return false;
552     }
553 
554     return true;
555   }
556 
557   /**
558    * Compares two strings. Considers null Strings to be the same as an empty String.
559    *
560    * @param one The first string to compare.
561    * @param two The second string to compare.
562    * @return true if the two strings are equals or are both "null or empty"
563    */
564   private boolean compareTextNullEqualsEmpty(String one, String two) {
565     String text1 = (one == null) ? "" : one;
566     String text2 = (two == null) ? "" : two;
567     return text1.equals(text2);
568   }
569 
570   private boolean hasSameName(final XMLNode node1, final XMLNode node2) {
571     String name1 = node1.getLocalName();
572     String name2 = node2.getLocalName();
573 
574     // ROOT may or may not have null name, so we must check for possible null values
575     if (name1 == null) {
576       return (name2 == null);
577     }
578     return name1.equals(name2);
579   }
580 
581   private boolean hasSameType(final XMLNode node1, final XMLNode node2) {
582     return (node1.getNodeType() == node2.getNodeType());
583   }
584 
585   private void printLocationInfo(final XMLNode node1, final XMLNode node2) {
586     if (_header) {
587       _header = false;
588       _pw.println("--- " + _file1);
589       _pw.println("+++ " + _file2);
590     }
591     _pw.print("@@ -");
592     _pw.print(node1.getNodeLocation());
593     _pw.print(" +");
594     _pw.print(node2.getNodeLocation());
595     _pw.println(" @@");
596   }
597 
598   private void printElementChangeBlock(final Element node1, final Element node2, final String msg) {
599     if (_print) {
600       _pw.print("- ");
601       printElement(node1);
602       _pw.print("+ ");
603       printElement(node2);
604       if (msg != null) {
605         _pw.println(msg);
606       }
607     }
608   }
609 
610   private void printElement(final Element node) {
611     _pw.print('<' + node.getLocalName());
612 
613     for (Iterator i = node.getAttributeIterator(); i.hasNext();) {
614       Attribute attr = (Attribute) i.next();
615       _pw.print(' ');
616       _pw.print(attr.getLocalName());
617       _pw.print("=\"");
618       _pw.print(attr.getStringValue());
619       _pw.print("\"");
620     }
621 
622     _pw.println('>');
623   }
624 
625   /**
626    * Prints the given text. Each line of the text is prefixed with the given prefix. If
627    * <code>text</code> has multiple newlines, the prefix will be printed on each line.
628    *
629    * @param prefix A prefix to display on each line of output
630    * @param text The text to display
631    */
632   private void printText(final String prefix, String text) {
633     if (text == null) {
634       _pw.println(prefix);
635       return;
636     }
637 
638     int idx = 0;
639     while ((idx = text.indexOf('\n')) >= 0) {
640       _pw.print(prefix);
641       _pw.println(text.substring(0, idx));
642       text = text.substring(idx + 1);
643     }
644     _pw.print(prefix);
645     _pw.println(text);
646   }
647 
648 }