1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.castor.xmlctf.xmldiff;
19
20 import java.io.PrintWriter;
21 import java.util.Iterator;
22 import java.util.LinkedList;
23 import java.util.List;
24 import java.util.StringTokenizer;
25
26 import org.castor.xmlctf.xmldiff.xml.XMLFileReader;
27 import org.castor.xmlctf.xmldiff.xml.nodes.Attribute;
28 import org.castor.xmlctf.xmldiff.xml.nodes.Element;
29 import org.castor.xmlctf.xmldiff.xml.nodes.ParentNode;
30 import org.castor.xmlctf.xmldiff.xml.nodes.Root;
31 import org.castor.xmlctf.xmldiff.xml.nodes.XMLNode;
32
33
34
35
36
37
38
39
40
41 public class XMLDiff {
42
43
44 private static final String XMLSCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
45
46
47 private final String _file1;
48
49 private final String _file2;
50
51 private final PrintWriter _pw = new PrintWriter(System.out, true);
52
53
54
55
56
57
58 private boolean _strictChildOrder = false;
59
60 private boolean _print = true;
61
62 private boolean _header = true;
63
64
65
66
67
68
69
70
71 public XMLDiff(final String file1, final String file2) {
72 if (file1 == null) {
73 String err = "The argument 'file1' may not be null.";
74 throw new IllegalArgumentException(err);
75 }
76
77 if (file2 == null) {
78 String err = "The argument 'file2' may not be null.";
79 throw new IllegalArgumentException(err);
80 }
81
82 _file1 = file1;
83 _file2 = file2;
84 }
85
86
87
88
89
90
91
92
93
94
95
96
97 public int compare() throws java.io.IOException {
98 XMLFileReader reader1 = new XMLFileReader(_file1);
99 XMLNode node1 = reader1.read();
100
101 XMLFileReader reader2 = new XMLFileReader(_file2);
102 XMLNode node2 = reader2.read();
103
104 return compareNodes(node1, node2);
105 }
106
107
108
109
110
111
112
113
114
115
116 private int compareNodes(final XMLNode node1, final XMLNode node2) {
117
118 if (!hasSameType(node1, node2)) {
119 if (_print) {
120 _pw.println("Types differ: <" + node1.getLocalName() + "> and <"
121 + node2.getLocalName() + "> for" + node1.getNodeLocation());
122 }
123 return 1;
124 }
125
126 int diffCount = 0;
127
128 String ns1 = node1.getNamespaceURI();
129 String ns2 = node2.getNamespaceURI();
130 if (!compareTextNullEqualsEmpty(ns1, ns2)) {
131 if (_print) {
132 _pw.println("Namespaces differ: ('" + ns1 + "' != '" + ns2 + "') for "
133 + node1.getNodeLocation());
134 }
135
136 ++diffCount;
137 }
138
139
140 String name1 = node1.getLocalName();
141 String name2 = node2.getLocalName();
142
143 if (name1 == null && name2 != null) {
144 if (_print) {
145 _pw.println("Names differ: null vs. <" + name2 + "> for "
146 + node1.getNodeLocation());
147 }
148 ++diffCount;
149 return diffCount;
150 } else if (name2 == null && name1 != null) {
151 if (_print) {
152 _pw.println("Names differ: <" + name1 + "> vs null for "
153 + node1.getNodeLocation());
154 }
155 ++diffCount;
156 return diffCount;
157 } else if (name1 != null && !name1.equals(name2)) {
158 if (_print) {
159 _pw.println("Names differ: <" + name1 + "> != <" + name2 + "> for "
160 + node1.getNodeLocation());
161 }
162 ++diffCount;
163 return diffCount;
164 }
165
166
167 switch (node1.getNodeType()) {
168 case XMLNode.ROOT:
169 diffCount += compareElementsStrictOrder((Root)node1, (Root)node2);
170 break;
171
172 case XMLNode.ELEMENT:
173 diffCount += compareElements((Element)node1, (Element)node2);
174 break;
175
176 case XMLNode.ATTRIBUTE:
177 diffCount += compareStringValues(node1, node2);
178 break;
179
180 case XMLNode.TEXT:
181 diffCount += compareStringValues(node1, node2);
182 break;
183
184 case XMLNode.PROCESSING_INSTRUCTION:
185
186 break;
187
188 default:
189 System.out.println("Unexpected node type in XMLDiff: " + node1.getNodeType());
190 break;
191 }
192
193 return diffCount;
194 }
195
196
197
198
199
200
201
202 private int compareStringValues(final XMLNode node1, final XMLNode node2) {
203 if (compareText(node1.getStringValue(), node2.getStringValue())) {
204 return 0;
205 }
206
207 if (_print) {
208 _pw.println();
209 printLocationInfo(node1, node2);
210 printText("- ", node1.getStringValue());
211 _pw.println();
212 printText("+ ", node2.getStringValue());
213 }
214 return 1;
215 }
216
217
218
219
220
221 private int compareAttributes(final Element node1, final Element node2) {
222 int diffCount = 0;
223
224 for (Iterator i = node1.getAttributeIterator(); i.hasNext(); ) {
225 Attribute attr1 = (Attribute) i.next();
226
227
228 String attValue2 = node2.getAttribute(attr1.getNamespaceURI(), attr1.getLocalName());
229 if (attValue2 == null) {
230
231 if (missingattributeIsIgnorable(attr1)) {
232 continue;
233 }
234
235
236 printElementChangeBlock(node1, node2, "Attribute '"
237 + attr1.getNodeLocation()
238 + "' does not exist in the second document.");
239 diffCount++;
240 continue;
241 }
242
243
244 String attValue1 = attr1.getStringValue();
245 if (!compareTextLikeQName(node1, node2, attValue1, attValue2)) {
246 printElementChangeBlock(node1, node2, "Attribute '"
247 + attr1.getNodeLocation()
248 + "' values are different.");
249 diffCount++;
250 }
251 }
252
253
254 for (Iterator i = node2.getAttributeIterator(); i.hasNext(); ) {
255 Attribute attr2 = (Attribute) i.next();
256 if (node1.getAttribute(attr2.getNamespaceURI(), attr2.getLocalName()) == null) {
257
258 if (missingattributeIsIgnorable(attr2)) {
259 continue;
260 }
261
262
263 printElementChangeBlock(node1, node2, "Attribute '"
264 + attr2.getNodeLocation()
265 + "' does not exist in the first document.");
266 diffCount++;
267 }
268 }
269
270 return diffCount;
271 }
272
273 private boolean missingattributeIsIgnorable(Attribute attr) {
274 String name = attr.getLocalName();
275 String ns = attr.getNamespaceURI();
276 if (ns == null) {
277 ns = "";
278 }
279
280 if (name.equals("noNamespaceSchemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
281 return true;
282 }
283 if (name.equals("schemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
284 return true;
285 }
286 return false;
287 }
288
289
290
291
292
293
294
295
296
297
298
299 private boolean compareTextLikeQName(final XMLNode node1, final XMLNode node2,
300 final String attValue1, final String attValue2) {
301
302 if (compareText(attValue1, attValue2)) {
303 return true;
304 }
305
306
307 final int idx1 = attValue1.indexOf(':');
308 final int idx2 = attValue2.indexOf(':');
309 if (idx1 < 0 && idx2 < 0) {
310 return false;
311 }
312
313 final String prefix1;
314 final String prefix2;
315 final String value1;
316 final String value2;
317
318 if (idx1 >= 0) {
319 value1 = attValue1.substring(idx1 + 1);
320 prefix1 = attValue1.substring(0, idx1);
321 } else {
322 value1 = attValue1;
323 prefix1 = "";
324 }
325
326 if (idx2 >= 0) {
327 value2 = attValue2.substring(idx2 + 1);
328 prefix2 = attValue2.substring(0, idx2);
329 } else {
330 value2 = attValue2;
331 prefix2 = "";
332 }
333
334
335 return compareText(value1, value2)
336 && compareTextNullEqualsEmpty(node1.getNamespaceURI(prefix1),
337 node2.getNamespaceURI(prefix2));
338 }
339
340
341
342
343
344
345
346
347
348 private int compareElements(final Element node1, final Element node2) {
349 int diffCount = compareAttributes(node1, node2);
350
351 if (_strictChildOrder) {
352 diffCount += compareElementsStrictOrder(node1, node2);
353 } else {
354 diffCount += compareElementsLooseOrder(node1, node2);
355 }
356 return diffCount;
357 }
358
359
360
361
362
363
364
365
366
367
368 private int compareElementsStrictOrder(final ParentNode node1, final ParentNode node2) {
369 int diffCount = 0;
370
371 Iterator i1 = node1.getChildIterator();
372 Iterator i2 = node2.getChildIterator();
373
374
375 if (i1.hasNext() && i2.hasNext()) {
376 XMLNode child1 = (XMLNode) i1.next();
377 XMLNode child2 = (XMLNode) i2.next();
378 while (child1 != null && child2 != null) {
379 if (nodeIsIgnorableText(child1)) {
380 if (!i1.hasNext()) {
381 break;
382 }
383 child1 = (XMLNode) i1.next();
384 continue;
385 }
386 if (nodeIsIgnorableText(child2)) {
387 if (!i2.hasNext()) {
388 break;
389 }
390 child2 = (XMLNode) i2.next();
391 continue;
392 }
393
394 diffCount += compareNodes(child1, child2);
395
396 if (!i1.hasNext() || !i2.hasNext()) {
397 break;
398 }
399
400 child1 = (XMLNode) i1.next();
401 child2 = (XMLNode) i2.next();
402 }
403 }
404
405
406 while (i1.hasNext()) {
407 XMLNode child1 = (XMLNode) i1.next();
408 if (!nodeIsIgnorableText(child1)) {
409 if (_print) {
410 printLocationInfo(child1, null);
411 _pw.println("- ");
412 }
413 ++diffCount;
414 }
415 }
416
417
418 while (i2.hasNext()) {
419 XMLNode child2 = (XMLNode) i2.next();
420 if (!nodeIsIgnorableText(child2)) {
421 if (_print) {
422 printLocationInfo(child2, null);
423 _pw.println("- ");
424 }
425 ++diffCount;
426 }
427 }
428
429 return diffCount;
430 }
431
432
433
434
435
436
437
438
439
440 private int compareElementsLooseOrder(final Element node1, final Element node2) {
441 int diffCount = 0;
442
443 final List used = new LinkedList();
444
445 for (Iterator i1 = node1.getChildIterator(); i1.hasNext(); ) {
446 XMLNode child1 = (XMLNode) i1.next();
447
448
449 if (nodeIsIgnorableText(child1) || foundExactMatch(node2, child1, used)) {
450 continue;
451 }
452
453
454 if (_print) {
455 diffCount += closestMatchDifference(node2, child1, used);
456 } else {
457 diffCount++;
458 }
459 }
460
461
462 for (Iterator i2 = node2.getChildIterator(); i2.hasNext(); ) {
463 XMLNode child2 = (XMLNode) i2.next();
464 if (!nodeIsIgnorableText(child2) && !used.contains(child2)) {
465 if (_print) {
466 _pw.println("Extra child node: " + child2.getNodeLocation());
467 }
468 ++diffCount;
469 }
470 }
471
472 return diffCount;
473 }
474
475
476
477
478
479
480
481
482
483
484
485
486 private boolean foundExactMatch(final Element parent, XMLNode target, final List usedList) {
487
488 boolean previousPrint = _print;
489
490 _print = false;
491 boolean found = false;
492 for (Iterator i2 = parent.getChildIterator(); i2.hasNext(); ) {
493 XMLNode child2 = (XMLNode) i2.next();
494 if (!usedList.contains(child2) && compareNodes(target, child2) == 0) {
495 usedList.add(child2);
496 found = true;
497 break;
498 }
499 }
500
501
502 _print = previousPrint;
503 return found;
504 }
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520 private int closestMatchDifference(final Element parent, final XMLNode target,
521 final List usedList) {
522 for (Iterator i2 = parent.getChildIterator(); i2.hasNext(); ) {
523 XMLNode child2 = (XMLNode) i2.next();
524 if (!usedList.contains(child2) && hasSameType(target, child2)
525 && hasSameName(target, child2)) {
526 usedList.add(child2);
527 return compareNodes(target, child2);
528 }
529 }
530
531 _pw.println("Missing child node: " + target.getNodeLocation() + " for "
532 + target.getNodeLocation());
533 return 1;
534 }
535
536
537
538
539
540
541
542
543 private boolean nodeIsIgnorableText(final XMLNode child) {
544 return (child.getNodeType() == XMLNode.TEXT && compareText(child.getStringValue(), ""));
545 }
546
547
548
549
550
551
552
553
554 private boolean compareText(final String s1, final String s2) {
555 if (s1.equals(s2)) {
556 return true;
557 }
558
559
560 StringTokenizer st1 = new StringTokenizer(s1);
561 StringTokenizer st2 = new StringTokenizer(s2);
562
563 while (st1.hasMoreTokens() && st2.hasMoreTokens()) {
564 if (!st1.nextToken().equals(st2.nextToken())) {
565 return false;
566 }
567 }
568
569
570 if (st1.hasMoreTokens() || st2.hasMoreTokens()) {
571 return false;
572 }
573
574 return true;
575 }
576
577
578
579
580
581
582
583
584
585 private boolean compareTextNullEqualsEmpty(String one, String two) {
586 String text1 = (one == null) ? "" : one;
587 String text2 = (two == null) ? "" : two;
588 return text1.equals(text2);
589 }
590
591 private boolean hasSameName(final XMLNode node1, final XMLNode node2) {
592 String name1 = node1.getLocalName();
593 String name2 = node2.getLocalName();
594
595
596 if (name1 == null) {
597 return (name2 == null);
598 }
599 return name1.equals(name2);
600 }
601
602 private boolean hasSameType(final XMLNode node1, final XMLNode node2) {
603 return (node1.getNodeType() == node2.getNodeType());
604 }
605
606 private void printLocationInfo(final XMLNode node1, final XMLNode node2) {
607 if (_header) {
608 _header = false;
609 _pw.println("--- " + _file1);
610 _pw.println("+++ " + _file2);
611 }
612 _pw.print("@@ -");
613 _pw.print(node1.getNodeLocation());
614 _pw.print(" +");
615 _pw.print(node2.getNodeLocation());
616 _pw.println(" @@");
617 }
618
619 private void printElementChangeBlock(final Element node1, final Element node2, final String msg) {
620 if (_print) {
621 _pw.print("- ");
622 printElement(node1);
623 _pw.print("+ ");
624 printElement(node2);
625 if (msg != null) {
626 _pw.println(msg);
627 }
628 }
629 }
630
631 private void printElement(final Element node) {
632 _pw.print('<' + node.getLocalName());
633
634 for (Iterator i = node.getAttributeIterator(); i.hasNext(); ) {
635 Attribute attr = (Attribute) i.next();
636 _pw.print(' ');
637 _pw.print(attr.getLocalName());
638 _pw.print("=\"");
639 _pw.print(attr.getStringValue());
640 _pw.print("\"");
641 }
642
643 _pw.println('>');
644 }
645
646
647
648
649
650
651
652
653
654 private void printText(final String prefix, String text) {
655 if (text == null) {
656 _pw.println(prefix);
657 return;
658 }
659
660 int idx = 0;
661 while ((idx = text.indexOf('\n')) >= 0) {
662 _pw.print(prefix);
663 _pw.println(text.substring(0, idx));
664 text = text.substring(idx + 1);
665 }
666 _pw.print(prefix);
667 _pw.println(text);
668 }
669
670 }