View Javadoc
1   /**
2    * Redistribution and use of this software and associated documentation ("Software"), with or
3    * without modification, are permitted provided that the following conditions are met:
4    *
5    * 1. Redistributions of source code must retain copyright statements and notices. Redistributions
6    * must also contain a copy of this document.
7    *
8    * 2. Redistributions in binary form must reproduce the above copyright notice, this list of
9    * conditions and the following disclaimer in the documentation and/or other materials provided with
10   * the distribution.
11   *
12   * 3. The name "Exolab" must not be used to endorse or promote products derived from this Software
13   * without prior written permission of Intalio, Inc. For written permission, please contact
14   * info@exolab.org.
15   *
16   * 4. Products derived from this Software may not be called "Exolab" nor may "Exolab" appear in
17   * their names without prior written permission of Intalio, Inc. Exolab is a registered trademark of
18   * Intalio, Inc.
19   *
20   * 5. Due credit should be given to the Exolab Project (http://www.exolab.org/).
21   *
22   * THIS SOFTWARE IS PROVIDED BY INTALIO, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESSED OR
23   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
24   * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTALIO, INC. OR ITS
25   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28   * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29   * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30   *
31   * Copyright 2001 (C) Intalio, Inc. All Rights Reserved.
32   *
33   * $Id$
34   */
35  
36  package org.exolab.castor.xml.schema.util;
37  
38  import java.util.Enumeration;
39  import java.util.Stack;
40  import java.util.Vector;
41  
42  import org.exolab.castor.xml.Namespaces;
43  import org.exolab.castor.xml.schema.AttributeDecl;
44  import org.exolab.castor.xml.schema.ComplexType;
45  import org.exolab.castor.xml.schema.ContentType;
46  import org.exolab.castor.xml.schema.ElementDecl;
47  import org.exolab.castor.xml.schema.Group;
48  import org.exolab.castor.xml.schema.Order;
49  import org.exolab.castor.xml.schema.Particle;
50  import org.exolab.castor.xml.schema.Schema;
51  import org.exolab.castor.xml.schema.SchemaException;
52  import org.exolab.castor.xml.schema.Structure;
53  import org.exolab.castor.xml.schema.XMLType;
54  import org.xml.sax.AttributeList;
55  import org.xml.sax.DocumentHandler;
56  import org.xml.sax.Locator;
57  import org.xml.sax.SAXException;
58  import org.xml.sax.SAXParseException;
59  
60  
61  
62  /**
63   * A Utility class which will attempt to create an XML Schema Object Model based on a given XML
64   * instance document.
65   *
66   * @author <a href="mailto:kvisco@intalio.com">Keith Visco</a>
67   * @version $Revision$ $Date: 2006-04-25 15:08:23 -0600 (Tue, 25 Apr 2006) $
68   **/
69  public final class XMLInstance2SchemaHandler implements DocumentHandler, org.xml.sax.ErrorHandler {
70  
71  
72    private static final String XMLNS = "xmlns";
73    private static final String DEFAULT_PREFIX = "xsd";
74    // --------------------/
75    // - Member Variables -/
76    // --------------------/
77  
78    /**
79     * The schema we are creating
80     **/
81    private Schema _schema = null;
82  
83    /**
84     * The stack of element declarations
85     **/
86    private final Stack<StateInfo> _siStack = new Stack<>();
87  
88    private String _nsPrefix = null;
89  
90    private Order _defaultGroupOrder = Order.sequence;
91  
92    // ----------------/
93    // - Constructors -/
94    // ----------------/
95  
96    /**
97     * Creates a new XMLInstance2SchemaHandler
98     *
99     **/
100   public XMLInstance2SchemaHandler() {
101     this(null);
102   } // -- XMLInstance2SchemaHandler
103 
104   /**
105    * Creates a new XMLInstance2SchemaHandler
106    *
107    **/
108   public XMLInstance2SchemaHandler(Schema schema) {
109     super();
110 
111     _schema = schema;
112     // -- create Schema and initialize
113     if (_schema == null) {
114       _schema = new Schema();
115       _schema.addNamespace(DEFAULT_PREFIX, Schema.DEFAULT_SCHEMA_NS);
116       _nsPrefix = DEFAULT_PREFIX;
117     }
118     // -- find or declare namespace prefix
119     else {
120       _nsPrefix = null;
121       Namespaces namespaces = _schema.getNamespaces();
122       Enumeration<String> enumeration = namespaces.getLocalNamespacePrefixes();
123       while (enumeration.hasMoreElements()) {
124         String key = enumeration.nextElement();
125         if (namespaces.getNamespaceURI(key).equals(Schema.DEFAULT_SCHEMA_NS)) {
126           _nsPrefix = key;
127           break;
128         }
129       }
130       if (_nsPrefix == null) {
131         _schema.addNamespace(DEFAULT_PREFIX, Schema.DEFAULT_SCHEMA_NS);
132         _nsPrefix = DEFAULT_PREFIX;
133       }
134     }
135   } // -- XMLInstance2SchemaHandler
136 
137   // -----------/
138   // - Methods -/
139   // -----------/
140 
141   /**
142    * Returns the XML Schema object that is being used by this handler
143    *
144    * @return the XML Schema object that is being used by this handler
145    **/
146   public Schema getSchema() {
147     return _schema;
148   }
149 
150   /**
151    * This method is used to set the default group type. Either "sequence" or "all". The default is
152    * "sequence".
153    *
154    * @param order the default group order to use.
155    **/
156   protected void setDefaultGroupOrder(Order order) {
157     _defaultGroupOrder = order;
158   } // -- setDefaultGroupOrder
159 
160   // ---------------------------------------/
161   // - org.xml.sax.DocumentHandler methods -/
162   // ---------------------------------------/
163 
164   public void characters(char[] ch, int start, int length) throws org.xml.sax.SAXException {
165     if (_siStack.isEmpty())
166       return;
167 
168     StateInfo sInfo = (StateInfo) _siStack.peek();
169 
170     if (sInfo.buffer == null) {
171       sInfo.buffer = new StringBuffer();
172     }
173     sInfo.buffer.append(ch, start, length);
174 
175     if (sInfo.complex) {
176       sInfo.mixed = true;
177     }
178   } // -- characters
179 
180   public void endDocument() throws org.xml.sax.SAXException {
181     // -- do nothing
182 
183   } // -- endDocument
184 
185   public void endElement(String name) throws org.xml.sax.SAXException {
186 
187     // -- strip namespace prefix
188     int idx = name.indexOf(':');
189     if (idx >= 0) {
190       name = name.substring(idx + 1);
191     }
192 
193     StateInfo sInfo = (StateInfo) _siStack.pop();
194 
195     // -- if we don't have a type, it means there are no
196     // -- children and therefore the type is a simpleType or
197     // -- simpleContent
198     if ((sInfo.element.getType() == null) && (sInfo.buffer != null)) {
199 
200       // -- create SimpleType (guess type)
201       String typeName = _nsPrefix + ':' + DatatypeHandler.guessType(sInfo.buffer.toString());
202       sInfo.element.setTypeReference(typeName);
203       // -- simpleContent
204       if (!sInfo.attributes.isEmpty()) {
205         ComplexType cType = new ComplexType(_schema);
206         // -- SHOULD CHANGE THIS TO SIMPLE CONTENT WHEN
207         // -- SCHEMA WRITER BUGS ARE FIXED
208         cType.setContentType(ContentType.mixed);
209         sInfo.element.setType(cType);
210         Group group = new Group();
211         group.setOrder(_defaultGroupOrder);
212         // -- add attributes
213         try {
214           cType.addGroup(group);
215           for (AttributeDecl attDecl : sInfo.attributes) {
216             cType.addAttributeDecl(attDecl);
217           }
218         } catch (SchemaException sx) {
219           throw new SAXException(sx);
220         }
221       }
222     } else {
223       ComplexType cType = (ComplexType) sInfo.element.getType();
224 
225       if (cType == null && !sInfo.attributes.isEmpty()) {
226         cType = new ComplexType(_schema);
227         sInfo.element.setType(cType);
228         Group group = new Group();
229         group.setOrder(_defaultGroupOrder);
230         // -- add attributes
231         try {
232           cType.addGroup(group);
233         } catch (SchemaException sx) {
234           throw new SAXException(sx);
235         }
236       }
237 
238       if (cType != null) {
239         for (AttributeDecl attDecl : sInfo.attributes) {
240           cType.addAttributeDecl(attDecl);
241         }
242       }
243     }
244 
245     // -- put element into parent element or as top-level in schema
246     if (!_siStack.isEmpty()) {
247       StateInfo parentInfo = (StateInfo) _siStack.peek();
248       ComplexType type = (ComplexType) parentInfo.element.getType();
249       Group group = null;
250       if ((type == null) || (type.getParticleCount() == 0)) {
251         if (type == null) {
252           parentInfo.complex = true;
253           type = new ComplexType(_schema);
254           parentInfo.element.setType(type);
255         }
256         group = new Group();
257         group.setOrder(_defaultGroupOrder);
258         try {
259           type.addGroup(group);
260           // -- add element
261           group.addElementDecl(sInfo.element);
262         } catch (SchemaException sx) {
263           throw new SAXException(sx);
264         }
265       } else {
266         group = (Group) type.getParticle(0);
267         // -- check for another element declaration with
268         // -- same name ...
269         ElementDecl element = group.getElementDecl(name);
270         boolean checkGroupType = false;
271         if (element != null) {
272           // -- if complex...merge definition
273           if (sInfo.complex) {
274             try {
275               merge(element, sInfo.element);
276             } catch (SchemaException sx) {
277               throw new SAXException(sx);
278             }
279           }
280           element.setMaxOccurs(Particle.UNBOUNDED);
281           checkGroupType = true;
282         } else {
283           try {
284             group.addElementDecl(sInfo.element);
285           } catch (SchemaException sx) {
286             throw new SAXException(sx);
287           }
288         }
289 
290         // -- change group type if necessary
291         if (checkGroupType && (group.getOrder() == Order.sequence)) {
292           // -- make sure element is last item in group,
293           // -- otherwise we need to switch to all
294           boolean found = false;
295           boolean changeType = false;
296           for (int i = 0; i < group.getParticleCount(); i++) {
297             if (found) {
298               changeType = true;
299               break;
300             }
301             if (element == group.getParticle(i))
302               found = true;
303           }
304           if (changeType) {
305             group.setOrder(Order.all);
306           }
307         }
308       }
309     } else {
310       try {
311         _schema.addElementDecl(sInfo.element);
312 
313         // -- make complexType top-level also
314         // XMLType type = sInfo.element.getType();
315         // if ((type != null) && (type.isComplexType())) {
316         // if (type.getName() == null) {
317         // type.setName(sInfo.element.getName() + "Type");
318         // _schema.addComplexType((ComplexType)type);
319         // }
320         // }
321       } catch (SchemaException sx) {
322         throw new SAXException(sx);
323       }
324     }
325 
326   } // -- endElement
327 
328 
329   public void ignorableWhitespace(char[] ch, int start, int length)
330       throws org.xml.sax.SAXException {
331     // -- do nothing
332 
333   } // -- ignorableWhitespace
334 
335   public void processingInstruction(String target, String data) throws org.xml.sax.SAXException {
336     // -- do nothing
337 
338   } // -- processingInstruction
339 
340   public void setDocumentLocator(final Locator locator) {}
341 
342   public void startDocument() throws org.xml.sax.SAXException {
343     // -- do nothing
344 
345   } // -- startDocument
346 
347 
348   public void startElement(String name, AttributeList atts) throws org.xml.sax.SAXException {
349 
350     // -- strip namespace prefix
351     int idx = name.indexOf(':');
352     if (idx >= 0) {
353       name = name.substring(idx + 1);
354     }
355 
356     StateInfo sInfo = null;
357 
358     boolean topLevel = false;
359     // -- if we are currently in another element
360     // -- definition...flag as complex content
361     if (!_siStack.isEmpty()) {
362       sInfo = (StateInfo) _siStack.peek();
363       sInfo.complex = true;
364     } else {
365       topLevel = true;
366     }
367 
368     // -- create current holder for stateInformation
369     sInfo = new StateInfo();
370     sInfo.topLevel = topLevel;
371     _siStack.push(sInfo);
372 
373     // -- create element definition
374     sInfo.element = new ElementDecl(_schema, name);
375 
376     // -- create attributes
377     for (int i = 0; i < atts.getLength(); i++) {
378 
379       String attName = atts.getName(i);
380 
381       // -- skip namespace declarations
382       if (attName.equals(XMLNS))
383         continue;
384       String prefix = "";
385       idx = attName.indexOf(':');
386       if (idx >= 0) {
387         prefix = attName.substring(0, idx);
388         attName = attName.substring(idx + 1);
389       }
390       if (prefix.equals(XMLNS))
391         continue;
392 
393       AttributeDecl attr = new AttributeDecl(_schema, attName);
394 
395       // -- guess simple type
396       String typeName = _nsPrefix + ':' + DatatypeHandler.guessType(atts.getValue(i));
397 
398       attr.setSimpleTypeReference(typeName);
399 
400       sInfo.attributes.add(attr);
401     }
402 
403   } // -- startElement
404 
405 
406   // ------------------------------------/
407   // - org.xml.sax.ErrorHandler methods -/
408   // ------------------------------------/
409 
410   public void error(SAXParseException exception) throws org.xml.sax.SAXException {
411     throw exception;
412 
413   } // -- error
414 
415   public void fatalError(SAXParseException exception) throws org.xml.sax.SAXException {
416     throw exception;
417 
418   } // -- fatalError
419 
420 
421   public void warning(SAXParseException exception) throws org.xml.sax.SAXException {
422     throw exception;
423 
424   } // -- warning
425 
426   // -------------------------/
427   // - local private methods -/
428   // -------------------------/
429 
430   /**
431    * Merges the two element declarations. The resulting merge is placed in ElementDecl e1.
432    *
433    * @param e1 the main ElementDecl
434    * @param e2 the secondary ElementDecl to merge with e1
435    **/
436   private void merge(ElementDecl e1, ElementDecl e2) throws SchemaException {
437 
438     XMLType e1Type = e1.getType();
439     XMLType e2Type = e2.getType();
440 
441     // -- Make sure types are not null and if so create them
442     if (e1Type == null) {
443       if (e2Type == null)
444         return; // -- nothing to merge
445       if (e2Type.isSimpleType()) {
446         e1.setType(e2Type);
447       } else {
448         ComplexType cType = new ComplexType(_schema);
449         Group group = new Group();
450         group.setOrder(_defaultGroupOrder);
451         cType.addGroup(group);
452         e1.setType(cType);
453         e1Type = cType;
454       }
455     } else if (e2Type == null) {
456       if (e1Type.isSimpleType()) {
457         e2.setType(e1Type);
458       } else {
459         ComplexType cType = new ComplexType(_schema);
460         Group group = new Group();
461         group.setOrder(_defaultGroupOrder);
462         cType.addGroup(group);
463         e2.setType(cType);
464         e2Type = cType;
465       }
466     }
467 
468     // -- both simple types
469     if (e1Type.isSimpleType() && e2Type.isSimpleType()) {
470       if (!e1Type.getName().equals(e2Type.getName())) {
471         String typeName =
472             _nsPrefix + ':' + DatatypeHandler.whichType(e1Type.getName(), e2Type.getName());
473         e1.setType(null);
474         e1.setTypeReference(typeName);
475       }
476       return;
477     }
478     // -- e1 is simple, e2 is complex
479     else if (e1Type.isSimpleType()) {
480       ComplexType cType = new ComplexType(_schema);
481       e1.setType(cType);
482       Group group = new Group();
483       group.setOrder(_defaultGroupOrder);
484       cType.addGroup(group);
485       cType.setContentType(ContentType.mixed);
486       e1Type = cType;
487       // -- do not return here...we need to now treat as both
488       // -- were complex
489     }
490     // -- e2 is simple, e1 is complex
491     else if (e2Type.isSimpleType()) {
492       ComplexType cType = new ComplexType(_schema);
493       e2.setType(cType);
494       Group group = new Group();
495       group.setOrder(_defaultGroupOrder);
496       cType.addGroup(group);
497       cType.setContentType(ContentType.mixed);
498       e2Type = cType;
499       // -- do not return here...we need to now treat as both
500       // -- were complex
501     }
502 
503     // -- both complex types
504     ComplexType cType1 = (ComplexType) e1Type;
505     ComplexType cType2 = (ComplexType) e2Type;
506 
507     // -- loop through all element/attribute declarations
508     // -- of e2 and add them to e1 if they do not already exist
509     // -- and mark them as optional
510 
511     Group e1Group = (Group) cType1.getParticle(0);
512     if (e1Group == null) {
513       e1Group = new Group();
514       e1Group.setOrder(_defaultGroupOrder);
515       cType1.addGroup(e1Group);
516 
517     }
518     Group e2Group = (Group) cType2.getParticle(0);
519     if (e2Group == null) {
520       e2Group = new Group();
521       e2Group.setOrder(_defaultGroupOrder);
522       cType2.addGroup(e2Group);
523 
524     }
525 
526     Enumeration enumeration = e2Group.enumerate();
527     while (enumeration.hasMoreElements()) {
528       Particle particle = (Particle) enumeration.nextElement();
529       if (particle.getStructureType() == Structure.ELEMENT) {
530         ElementDecl element = (ElementDecl) particle;
531         ElementDecl main = e1Group.getElementDecl(element.getName());
532         if (main == null) {
533           e1Group.addElementDecl(element);
534           element.setMinOccurs(0);
535         } else {
536           merge(main, element);
537         }
538       }
539     }
540     // -- add all attributes from type2
541     enumeration = cType2.getAttributeDecls();
542 
543     while (enumeration.hasMoreElements()) {
544       // -- check for attribute with same name
545       AttributeDecl attNew = (AttributeDecl) enumeration.nextElement();
546 
547       String attName = attNew.getName();
548       AttributeDecl attPrev = cType1.getAttributeDecl(attName);
549       if (attPrev == null) {
550         attNew.setUse(AttributeDecl.USE_OPTIONAL);
551         cType1.addAttributeDecl(attNew);
552       } else {
553         String type1 = attPrev.getSimpleType().getName();
554         String type2 = attNew.getSimpleType().getName();
555         if (!type1.equals(type2)) {
556           String typeName = _nsPrefix + ':' + DatatypeHandler.whichType(type1, type2);
557           attPrev.setSimpleTypeReference(typeName);
558         }
559       }
560     }
561 
562     // -- loop through all element/attribute declarations
563     // -- of e1 and if they do not exist in e2, simply
564     // -- mark them as optional
565     enumeration = e1Group.enumerate();
566     while (enumeration.hasMoreElements()) {
567       Particle particle = (Particle) enumeration.nextElement();
568       if (particle.getStructureType() == Structure.ELEMENT) {
569         ElementDecl element = (ElementDecl) particle;
570         if (e2Group.getElementDecl(element.getName()) == null) {
571           element.setMinOccurs(0);
572         }
573       }
574     }
575 
576 
577   } // -- merge
578 
579   /**
580    * Inner-class to hold state
581    **/
582   class StateInfo {
583     Namespaces namespaces = null;
584     ElementDecl element = null;
585     final Vector<AttributeDecl> attributes = new Vector<>();
586     StringBuffer buffer = null;
587     boolean mixed = false;
588     boolean complex = false;
589     boolean topLevel = false;
590   } // -- StateInfo
591 
592 } // --
593 
594