1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.castor.xmlctf.xmldiff;
17
18 import java.io.PrintWriter;
19 import java.util.Iterator;
20 import java.util.LinkedList;
21 import java.util.List;
22 import java.util.StringTokenizer;
23
24 import org.castor.xmlctf.xmldiff.xml.XMLFileReader;
25 import org.castor.xmlctf.xmldiff.xml.nodes.Attribute;
26 import org.castor.xmlctf.xmldiff.xml.nodes.Element;
27 import org.castor.xmlctf.xmldiff.xml.nodes.ParentNode;
28 import org.castor.xmlctf.xmldiff.xml.nodes.Root;
29 import org.castor.xmlctf.xmldiff.xml.nodes.XMLNode;
30
31
32
33
34
35
36
37
38
39 public class XMLDiff {
40
41
42 private static final String XMLSCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
43
44
45 private final String _file1;
46
47 private final String _file2;
48
49 private final PrintWriter _pw = new PrintWriter(System.out, true);
50
51
52
53
54
55
56 private boolean _strictChildOrder = false;
57
58 private boolean _print = true;
59
60 private boolean _header = true;
61
62
63
64
65
66
67
68 public XMLDiff(final String file1, final String file2) {
69 if (file1 == null) {
70 String err = "The argument 'file1' may not be null.";
71 throw new IllegalArgumentException(err);
72 }
73
74 if (file2 == null) {
75 String err = "The argument 'file2' may not be null.";
76 throw new IllegalArgumentException(err);
77 }
78
79 _file1 = file1;
80 _file2 = file2;
81 }
82
83
84
85
86
87
88
89
90
91
92
93 public int compare() throws java.io.IOException {
94 XMLFileReader reader1 = new XMLFileReader(_file1);
95 XMLNode node1 = reader1.read();
96
97 XMLFileReader reader2 = new XMLFileReader(_file2);
98 XMLNode node2 = reader2.read();
99
100 return compareNodes(node1, node2);
101 }
102
103
104
105
106
107
108
109
110
111
112 private int compareNodes(final XMLNode node1, final XMLNode node2) {
113
114 if (!hasSameType(node1, node2)) {
115 if (_print) {
116 _pw.println("Types differ: <" + node1.getLocalName() + "> and <" + node2.getLocalName()
117 + "> for" + node1.getNodeLocation());
118 }
119 return 1;
120 }
121
122 int diffCount = 0;
123
124 String ns1 = node1.getNamespaceURI();
125 String ns2 = node2.getNamespaceURI();
126 if (!compareTextNullEqualsEmpty(ns1, ns2)) {
127 if (_print) {
128 _pw.println(
129 "Namespaces differ: ('" + ns1 + "' != '" + ns2 + "') for " + node1.getNodeLocation());
130 }
131
132 ++diffCount;
133 }
134
135
136 String name1 = node1.getLocalName();
137 String name2 = node2.getLocalName();
138
139 if (name1 == null && name2 != null) {
140 if (_print) {
141 _pw.println("Names differ: null vs. <" + name2 + "> for " + node1.getNodeLocation());
142 }
143 ++diffCount;
144 return diffCount;
145 } else if (name2 == null && name1 != null) {
146 if (_print) {
147 _pw.println("Names differ: <" + name1 + "> vs null for " + node1.getNodeLocation());
148 }
149 ++diffCount;
150 return diffCount;
151 } else if (name1 != null && !name1.equals(name2)) {
152 if (_print) {
153 _pw.println(
154 "Names differ: <" + name1 + "> != <" + name2 + "> for " + node1.getNodeLocation());
155 }
156 ++diffCount;
157 return diffCount;
158 }
159
160
161 switch (node1.getNodeType()) {
162 case XMLNode.ROOT:
163 diffCount += compareElementsStrictOrder((Root) node1, (Root) node2);
164 break;
165
166 case XMLNode.ELEMENT:
167 diffCount += compareElements((Element) node1, (Element) node2);
168 break;
169
170 case XMLNode.ATTRIBUTE:
171 diffCount += compareStringValues(node1, node2);
172 break;
173
174 case XMLNode.TEXT:
175 diffCount += compareStringValues(node1, node2);
176 break;
177
178 case XMLNode.PROCESSING_INSTRUCTION:
179
180 break;
181
182 default:
183 System.out.println("Unexpected node type in XMLDiff: " + node1.getNodeType());
184 break;
185 }
186
187 return diffCount;
188 }
189
190
191
192
193
194
195
196
197 private int compareStringValues(final XMLNode node1, final XMLNode node2) {
198 if (compareText(node1.getStringValue(), node2.getStringValue())) {
199 return 0;
200 }
201
202 if (_print) {
203 _pw.println();
204 printLocationInfo(node1, node2);
205 printText("- ", node1.getStringValue());
206 _pw.println();
207 printText("+ ", node2.getStringValue());
208 }
209 return 1;
210 }
211
212
213
214
215
216 private int compareAttributes(final Element node1, final Element node2) {
217 int diffCount = 0;
218
219 for (Iterator i = node1.getAttributeIterator(); i.hasNext();) {
220 Attribute attr1 = (Attribute) i.next();
221
222
223 String attValue2 = node2.getAttribute(attr1.getNamespaceURI(), attr1.getLocalName());
224 if (attValue2 == null) {
225
226 if (missingattributeIsIgnorable(attr1)) {
227 continue;
228 }
229
230
231 printElementChangeBlock(node1, node2,
232 "Attribute '" + attr1.getNodeLocation() + "' does not exist in the second document.");
233 diffCount++;
234 continue;
235 }
236
237
238 String attValue1 = attr1.getStringValue();
239 if (!compareTextLikeQName(node1, node2, attValue1, attValue2)) {
240 printElementChangeBlock(node1, node2,
241 "Attribute '" + attr1.getNodeLocation() + "' values are different.");
242 diffCount++;
243 }
244 }
245
246
247 for (Iterator i = node2.getAttributeIterator(); i.hasNext();) {
248 Attribute attr2 = (Attribute) i.next();
249 if (node1.getAttribute(attr2.getNamespaceURI(), attr2.getLocalName()) == null) {
250
251 if (missingattributeIsIgnorable(attr2)) {
252 continue;
253 }
254
255
256 printElementChangeBlock(node1, node2,
257 "Attribute '" + attr2.getNodeLocation() + "' does not exist in the first document.");
258 diffCount++;
259 }
260 }
261
262 return diffCount;
263 }
264
265 private boolean missingattributeIsIgnorable(Attribute attr) {
266 String name = attr.getLocalName();
267 String ns = attr.getNamespaceURI();
268 if (ns == null) {
269 ns = "";
270 }
271
272 if (name.equals("noNamespaceSchemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
273 return true;
274 }
275 if (name.equals("schemaLocation") && ns.equals(XMLSCHEMA_INSTANCE)) {
276 return true;
277 }
278 return false;
279 }
280
281
282
283
284
285
286
287
288
289
290 private boolean compareTextLikeQName(final XMLNode node1, final XMLNode node2,
291 final String attValue1, final String attValue2) {
292
293 if (compareText(attValue1, attValue2)) {
294 return true;
295 }
296
297
298 final int idx1 = attValue1.indexOf(':');
299 final int idx2 = attValue2.indexOf(':');
300 if (idx1 < 0 && idx2 < 0) {
301 return false;
302 }
303
304 final String prefix1;
305 final String prefix2;
306 final String value1;
307 final String value2;
308
309 if (idx1 >= 0) {
310 value1 = attValue1.substring(idx1 + 1);
311 prefix1 = attValue1.substring(0, idx1);
312 } else {
313 value1 = attValue1;
314 prefix1 = "";
315 }
316
317 if (idx2 >= 0) {
318 value2 = attValue2.substring(idx2 + 1);
319 prefix2 = attValue2.substring(0, idx2);
320 } else {
321 value2 = attValue2;
322 prefix2 = "";
323 }
324
325
326 return compareText(value1, value2) && compareTextNullEqualsEmpty(node1.getNamespaceURI(prefix1),
327 node2.getNamespaceURI(prefix2));
328 }
329
330
331
332
333
334
335
336
337 private int compareElements(final Element node1, final Element node2) {
338 int diffCount = compareAttributes(node1, node2);
339
340 if (_strictChildOrder) {
341 diffCount += compareElementsStrictOrder(node1, node2);
342 } else {
343 diffCount += compareElementsLooseOrder(node1, node2);
344 }
345 return diffCount;
346 }
347
348
349
350
351
352
353
354
355
356 private int compareElementsStrictOrder(final ParentNode node1, final ParentNode node2) {
357 int diffCount = 0;
358
359 Iterator i1 = node1.getChildIterator();
360 Iterator i2 = node2.getChildIterator();
361
362
363 if (i1.hasNext() && i2.hasNext()) {
364 XMLNode child1 = (XMLNode) i1.next();
365 XMLNode child2 = (XMLNode) i2.next();
366 while (child1 != null && child2 != null) {
367 if (nodeIsIgnorableText(child1)) {
368 if (!i1.hasNext()) {
369 break;
370 }
371 child1 = (XMLNode) i1.next();
372 continue;
373 }
374 if (nodeIsIgnorableText(child2)) {
375 if (!i2.hasNext()) {
376 break;
377 }
378 child2 = (XMLNode) i2.next();
379 continue;
380 }
381
382 diffCount += compareNodes(child1, child2);
383
384 if (!i1.hasNext() || !i2.hasNext()) {
385 break;
386 }
387
388 child1 = (XMLNode) i1.next();
389 child2 = (XMLNode) i2.next();
390 }
391 }
392
393
394 while (i1.hasNext()) {
395 XMLNode child1 = (XMLNode) i1.next();
396 if (!nodeIsIgnorableText(child1)) {
397 if (_print) {
398 printLocationInfo(child1, null);
399 _pw.println("- ");
400 }
401 ++diffCount;
402 }
403 }
404
405
406 while (i2.hasNext()) {
407 XMLNode child2 = (XMLNode) i2.next();
408 if (!nodeIsIgnorableText(child2)) {
409 if (_print) {
410 printLocationInfo(child2, null);
411 _pw.println("- ");
412 }
413 ++diffCount;
414 }
415 }
416
417 return diffCount;
418 }
419
420
421
422
423
424
425
426
427 private int compareElementsLooseOrder(final Element node1, final Element node2) {
428 int diffCount = 0;
429
430 final List used = new LinkedList();
431
432 for (Iterator i1 = node1.getChildIterator(); i1.hasNext();) {
433 XMLNode child1 = (XMLNode) i1.next();
434
435
436 if (nodeIsIgnorableText(child1) || foundExactMatch(node2, child1, used)) {
437 continue;
438 }
439
440
441 if (_print) {
442 diffCount += closestMatchDifference(node2, child1, used);
443 } else {
444 diffCount++;
445 }
446 }
447
448
449 for (Iterator i2 = node2.getChildIterator(); i2.hasNext();) {
450 XMLNode child2 = (XMLNode) i2.next();
451 if (!nodeIsIgnorableText(child2) && !used.contains(child2)) {
452 if (_print) {
453 _pw.println("Extra child node: " + child2.getNodeLocation());
454 }
455 ++diffCount;
456 }
457 }
458
459 return diffCount;
460 }
461
462
463
464
465
466
467
468
469
470
471
472 private boolean foundExactMatch(final Element parent, XMLNode target, final List usedList) {
473
474 boolean previousPrint = _print;
475
476 _print = false;
477 boolean found = false;
478 for (Iterator i2 = parent.getChildIterator(); i2.hasNext();) {
479 XMLNode child2 = (XMLNode) i2.next();
480 if (!usedList.contains(child2) && compareNodes(target, child2) == 0) {
481 usedList.add(child2);
482 found = true;
483 break;
484 }
485 }
486
487
488 _print = previousPrint;
489 return found;
490 }
491
492
493
494
495
496
497
498
499
500
501
502
503
504 private int closestMatchDifference(final Element parent, final XMLNode target,
505 final List usedList) {
506 for (Iterator i2 = parent.getChildIterator(); i2.hasNext();) {
507 XMLNode child2 = (XMLNode) i2.next();
508 if (!usedList.contains(child2) && hasSameType(target, child2)
509 && hasSameName(target, child2)) {
510 usedList.add(child2);
511 return compareNodes(target, child2);
512 }
513 }
514
515 _pw.println(
516 "Missing child node: " + target.getNodeLocation() + " for " + target.getNodeLocation());
517 return 1;
518 }
519
520
521
522
523
524
525 private boolean nodeIsIgnorableText(final XMLNode child) {
526 return (child.getNodeType() == XMLNode.TEXT && compareText(child.getStringValue(), ""));
527 }
528
529
530
531
532
533
534 private boolean compareText(final String s1, final String s2) {
535 if (s1.equals(s2)) {
536 return true;
537 }
538
539
540 StringTokenizer st1 = new StringTokenizer(s1);
541 StringTokenizer st2 = new StringTokenizer(s2);
542
543 while (st1.hasMoreTokens() && st2.hasMoreTokens()) {
544 if (!st1.nextToken().equals(st2.nextToken())) {
545 return false;
546 }
547 }
548
549
550 if (st1.hasMoreTokens() || st2.hasMoreTokens()) {
551 return false;
552 }
553
554 return true;
555 }
556
557
558
559
560
561
562
563
564 private boolean compareTextNullEqualsEmpty(String one, String two) {
565 String text1 = (one == null) ? "" : one;
566 String text2 = (two == null) ? "" : two;
567 return text1.equals(text2);
568 }
569
570 private boolean hasSameName(final XMLNode node1, final XMLNode node2) {
571 String name1 = node1.getLocalName();
572 String name2 = node2.getLocalName();
573
574
575 if (name1 == null) {
576 return (name2 == null);
577 }
578 return name1.equals(name2);
579 }
580
581 private boolean hasSameType(final XMLNode node1, final XMLNode node2) {
582 return (node1.getNodeType() == node2.getNodeType());
583 }
584
585 private void printLocationInfo(final XMLNode node1, final XMLNode node2) {
586 if (_header) {
587 _header = false;
588 _pw.println("--- " + _file1);
589 _pw.println("+++ " + _file2);
590 }
591 _pw.print("@@ -");
592 _pw.print(node1.getNodeLocation());
593 _pw.print(" +");
594 _pw.print(node2.getNodeLocation());
595 _pw.println(" @@");
596 }
597
598 private void printElementChangeBlock(final Element node1, final Element node2, final String msg) {
599 if (_print) {
600 _pw.print("- ");
601 printElement(node1);
602 _pw.print("+ ");
603 printElement(node2);
604 if (msg != null) {
605 _pw.println(msg);
606 }
607 }
608 }
609
610 private void printElement(final Element node) {
611 _pw.print('<' + node.getLocalName());
612
613 for (Iterator i = node.getAttributeIterator(); i.hasNext();) {
614 Attribute attr = (Attribute) i.next();
615 _pw.print(' ');
616 _pw.print(attr.getLocalName());
617 _pw.print("=\"");
618 _pw.print(attr.getStringValue());
619 _pw.print("\"");
620 }
621
622 _pw.println('>');
623 }
624
625
626
627
628
629
630
631
632 private void printText(final String prefix, String text) {
633 if (text == null) {
634 _pw.println(prefix);
635 return;
636 }
637
638 int idx = 0;
639 while ((idx = text.indexOf('\n')) >= 0) {
640 _pw.print(prefix);
641 _pw.println(text.substring(0, idx));
642 text = text.substring(idx + 1);
643 }
644 _pw.print(prefix);
645 _pw.println(text);
646 }
647
648 }