View Javadoc
1   /**
2    * Redistribution and use of this software and associated documentation
3    * ("Software"), with or without modification, are permitted provided
4    * that the following conditions are met:
5    *
6    * 1. Redistributions of source code must retain copyright
7    *    statements and notices.  Redistributions must also contain a
8    *    copy of this document.
9    *
10   * 2. Redistributions in binary form must reproduce the
11   *    above copyright notice, this list of conditions and the
12   *    following disclaimer in the documentation and/or other
13   *    materials provided with the distribution.
14   *
15   * 3. The name "Exolab" must not be used to endorse or promote
16   *    products derived from this Software without prior written
17   *    permission of Intalio, Inc.  For written permission,
18   *    please contact info@exolab.org.
19   *
20   * 4. Products derived from this Software may not be called "Exolab"
21   *    nor may "Exolab" appear in their names without prior written
22   *    permission of Intalio, Inc. Exolab is a registered
23   *    trademark of Intalio, Inc.
24   *
25   * 5. Due credit should be given to the Exolab Project
26   *    (http://www.exolab.org/).
27   *
28   * THIS SOFTWARE IS PROVIDED BY INTALIO, INC. AND CONTRIBUTORS
29   * ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
30   * NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
31   * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
32   * INTALIO, INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
33   * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
34   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
35   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
39   * OF THE POSSIBILITY OF SUCH DAMAGE.
40   *
41   * Copyright 2001 (C) Intalio, Inc. All Rights Reserved.
42   *
43   * $Id$
44   */
45  
46  package org.exolab.castor.xml.schema.util;
47  
48  import java.util.Enumeration;
49  import java.util.Stack;
50  import java.util.Vector;
51  
52  import org.exolab.castor.xml.Namespaces;
53  import org.exolab.castor.xml.schema.AttributeDecl;
54  import org.exolab.castor.xml.schema.ComplexType;
55  import org.exolab.castor.xml.schema.ContentType;
56  import org.exolab.castor.xml.schema.ElementDecl;
57  import org.exolab.castor.xml.schema.Group;
58  import org.exolab.castor.xml.schema.Order;
59  import org.exolab.castor.xml.schema.Particle;
60  import org.exolab.castor.xml.schema.Schema;
61  import org.exolab.castor.xml.schema.SchemaException;
62  import org.exolab.castor.xml.schema.Structure;
63  import org.exolab.castor.xml.schema.XMLType;
64  import org.xml.sax.AttributeList;
65  import org.xml.sax.DocumentHandler;
66  import org.xml.sax.Locator;
67  import org.xml.sax.SAXException;
68  import org.xml.sax.SAXParseException;
69  
70  
71  
72  /**
73   * A Utility class which will attempt to create an XML Schema
74   * Object Model based on a given XML instance document.
75   *
76   * @author <a href="mailto:kvisco@intalio.com">Keith Visco</a>
77   * @version $Revision$ $Date: 2006-04-25 15:08:23 -0600 (Tue, 25 Apr 2006) $ 
78  **/
79  public final class XMLInstance2SchemaHandler
80      implements DocumentHandler, org.xml.sax.ErrorHandler
81  {
82  
83  
84      private static final String XMLNS          = "xmlns";
85      private static final String DEFAULT_PREFIX = "xsd";
86        //--------------------/
87       //- Member Variables -/
88      //--------------------/
89      
90      /**
91       * The schema we are creating
92      **/
93      private Schema _schema = null;
94  
95      /**
96       * The stack of element declarations
97      **/
98      private Stack _siStack = null;
99      
100     private String _nsPrefix = null;
101     
102     private Order  _defaultGroupOrder = Order.sequence;
103     
104       //----------------/
105      //- Constructors -/
106     //----------------/
107 
108     /**
109      * Creates a new XMLInstance2SchemaHandler
110      *
111     **/
112     public XMLInstance2SchemaHandler() {
113         this(null);
114     } //-- XMLInstance2SchemaHandler
115 
116     /**
117      * Creates a new XMLInstance2SchemaHandler
118      *
119     **/
120     public XMLInstance2SchemaHandler(Schema schema) {
121         super();
122         
123         _siStack   = new Stack();
124         
125         _schema = schema;
126         //-- create Schema and initialize
127         if (_schema == null) {
128             _schema = new Schema();
129             _schema.addNamespace(DEFAULT_PREFIX, Schema.DEFAULT_SCHEMA_NS);
130             _nsPrefix = DEFAULT_PREFIX;
131         }
132         //-- find or declare namespace prefix
133         else {
134             _nsPrefix = null;
135             Namespaces namespaces = _schema.getNamespaces();
136             Enumeration enumeration = namespaces.getLocalNamespacePrefixes();
137             while (enumeration.hasMoreElements()) {
138                 String key = (String) enumeration.nextElement();
139                 if (namespaces.getNamespaceURI(key).equals(Schema.DEFAULT_SCHEMA_NS)) {
140                     _nsPrefix = key;
141                     break;
142                 }
143             }
144             if (_nsPrefix == null) {
145                 _schema.addNamespace(DEFAULT_PREFIX, Schema.DEFAULT_SCHEMA_NS);
146                 _nsPrefix = DEFAULT_PREFIX;
147             }
148         }
149     } //-- XMLInstance2SchemaHandler
150 
151       //-----------/
152      //- Methods -/
153     //-----------/
154     
155     /**
156      * Returns the XML Schema object that is being used by this handler
157      *
158      * @return the XML Schema object that is being used by this handler
159     **/
160     public Schema getSchema() {
161         return _schema;
162     }
163     
164     /**
165      * This method is used to set the default group type. Either
166      * "sequence" or "all". The default is "sequence".
167      *
168      * @param order the default group order to use.
169     **/
170     protected void setDefaultGroupOrder(Order order) {
171         _defaultGroupOrder = order;
172     } //-- setDefaultGroupOrder
173       
174     //---------------------------------------/
175     //- org.xml.sax.DocumentHandler methods -/
176     //---------------------------------------/
177     
178     public void characters(char[] ch, int start, int length) 
179         throws org.xml.sax.SAXException
180     {
181         if (_siStack.isEmpty()) return;
182         
183         StateInfo sInfo = (StateInfo)_siStack.peek();
184         
185         if (sInfo.buffer == null) {
186             sInfo.buffer = new StringBuffer();
187         }
188         sInfo.buffer.append(ch, start, length);
189         
190         if (sInfo.complex) {
191             sInfo.mixed = true;    
192         }
193     } //-- characters
194     
195     public void endDocument()
196         throws org.xml.sax.SAXException
197     {
198         //-- do nothing
199         
200     } //-- endDocument
201     
202     public void endElement(String name) 
203         throws org.xml.sax.SAXException
204     {
205         
206         //-- strip namespace prefix
207         int idx = name.indexOf(':');
208         if (idx >= 0) {
209             name = name.substring(idx+1);
210         }
211         
212         StateInfo sInfo = (StateInfo) _siStack.pop();
213         
214         //-- if we don't have a type, it means there are no
215         //-- children and therefore the type is a simpleType or
216         //-- simpleContent
217         if ((sInfo.element.getType() == null) && (sInfo.buffer != null)) {
218             
219             //-- create SimpleType (guess type)
220             String typeName = _nsPrefix + ':' + 
221                 DatatypeHandler.guessType(sInfo.buffer.toString());
222             sInfo.element.setTypeReference(typeName);
223             //-- simpleContent
224             if (sInfo.attributes.size() > 0) {
225                 ComplexType cType = new ComplexType(_schema);
226                 //-- SHOULD CHANGE THIS TO SIMPLE CONTENT WHEN
227                 //-- SCHEMA WRITER BUGS ARE FIXED
228                 cType.setContentType(ContentType.mixed);
229                 sInfo.element.setType(cType);
230                 Group group = new Group();
231                 group.setOrder(_defaultGroupOrder);
232                 //-- add attributes
233                 try {
234                     cType.addGroup(group);
235                     for (int i = 0; i < sInfo.attributes.size(); i++) {
236                         AttributeDecl attDecl = 
237                             (AttributeDecl)sInfo.attributes.elementAt(i);
238                         cType.addAttributeDecl(attDecl);
239                     }
240                 }
241                 catch(SchemaException sx) {
242                     throw new SAXException(sx);
243                 }
244             }
245         }
246         else {
247             ComplexType cType = (ComplexType)sInfo.element.getType();
248             
249             if ((cType == null) && (sInfo.attributes.size() > 0)) {
250                 cType = new ComplexType(_schema);
251                 sInfo.element.setType(cType);
252                 Group group = new Group();
253                 group.setOrder(_defaultGroupOrder);
254                 //-- add attributes
255                 try {
256                     cType.addGroup(group);
257                 }
258                 catch(SchemaException sx) {
259                     throw new SAXException(sx);
260                 }
261             }
262             
263             if (cType != null) {
264                 for (int i = 0; i < sInfo.attributes.size(); i++) {
265                     AttributeDecl attDecl = 
266                         (AttributeDecl)sInfo.attributes.elementAt(i);
267                     cType.addAttributeDecl(attDecl);
268                 }
269             }
270         }
271         
272         //-- put element into parent element or as top-level in schema
273         if (!_siStack.isEmpty()) {
274             StateInfo parentInfo = (StateInfo)_siStack.peek();
275             ComplexType type = (ComplexType) parentInfo.element.getType();
276             Group group = null;
277             if ((type == null) || (type.getParticleCount() == 0)) {
278                 if (type == null) {
279                     parentInfo.complex = true;
280                     type = new ComplexType(_schema);
281                     parentInfo.element.setType(type);
282                 }
283                 group = new Group();
284                 group.setOrder(_defaultGroupOrder);
285                 try {
286                     type.addGroup(group);
287                     //-- add element
288                     group.addElementDecl(sInfo.element);
289                 }
290                 catch(SchemaException sx) {
291                     throw new SAXException(sx);
292                 }
293             }
294             else {
295                 group = (Group) type.getParticle(0);
296                 //-- check for another element declaration with
297                 //-- same name ...
298                 ElementDecl element = group.getElementDecl(name);
299                 boolean checkGroupType = false;
300                 if (element != null) {
301                     //-- if complex...merge definition
302                     if (sInfo.complex) {
303                         try {
304                             merge(element, sInfo.element);
305                         }
306                         catch(SchemaException sx) {
307                             throw new SAXException(sx);
308                         }
309                     }
310                     element.setMaxOccurs(Particle.UNBOUNDED);
311                     checkGroupType = true;
312                 }
313                 else {
314                     try {
315                         group.addElementDecl(sInfo.element);
316                     }
317                     catch(SchemaException sx) {
318                         throw new SAXException(sx);
319                     }
320                 }
321                 
322                 //-- change group type if necessary
323                 if (checkGroupType && (group.getOrder() == Order.sequence)) {
324                     //-- make sure element is last item in group,
325                     //-- otherwise we need to switch to all
326                     boolean found = false;
327                     boolean changeType = false;
328                     for (int i = 0; i < group.getParticleCount(); i++) {
329                         if (found) {
330                             changeType = true;
331                             break;
332                         }
333                         if (element == group.getParticle(i)) found = true;
334                     }
335                     if (changeType) {
336                         group.setOrder(Order.all);
337                     }
338                 }
339             }
340         }
341         else {
342             try {
343                 _schema.addElementDecl(sInfo.element);
344                 
345                 //-- make complexType top-level also
346                 //XMLType type = sInfo.element.getType();
347                 //if ((type != null) && (type.isComplexType())) {
348                 //    if (type.getName() == null) {
349                 //        type.setName(sInfo.element.getName() + "Type");
350                 //        _schema.addComplexType((ComplexType)type);
351                 //    }
352                 //}
353             }
354             catch(SchemaException sx) {
355                 throw new SAXException(sx);
356             }
357         }
358         
359     } //-- endElement
360 
361 
362     public void ignorableWhitespace(char[] ch, int start, int length) 
363         throws org.xml.sax.SAXException
364     {
365         //-- do nothing
366         
367     } //-- ignorableWhitespace
368 
369     public void processingInstruction(String target, String data) 
370         throws org.xml.sax.SAXException
371     {
372         //-- do nothing
373 
374     } //-- processingInstruction
375     
376     public void setDocumentLocator(final Locator locator) { }
377     
378     public void startDocument()
379         throws org.xml.sax.SAXException
380     {
381         //-- do nothing
382         
383     } //-- startDocument
384 
385     
386     public void startElement(String name, AttributeList atts) 
387         throws org.xml.sax.SAXException
388     {
389         
390         //-- strip namespace prefix
391         int idx = name.indexOf(':');
392         if (idx >= 0) {
393             name = name.substring(idx+1);
394         }
395 
396         StateInfo sInfo = null;
397         
398         boolean topLevel = false;
399         //-- if we are currently in another element 
400         //-- definition...flag as complex content
401         if (!_siStack.isEmpty()) {
402             sInfo = (StateInfo)_siStack.peek();
403             sInfo.complex = true;
404         }
405         else {
406             topLevel = true;
407         }
408         
409         //-- create current holder for stateInformation
410         sInfo = new StateInfo();
411         sInfo.topLevel = topLevel;
412         _siStack.push(sInfo);
413         
414         //-- create element definition
415         sInfo.element = new ElementDecl(_schema, name);
416         
417         //-- create attributes
418         for (int i = 0; i < atts.getLength(); i++) {
419             
420             String attName = atts.getName(i);
421             
422             //-- skip namespace declarations
423             if (attName.equals(XMLNS)) continue;
424             String prefix = "";
425             idx = attName.indexOf(':');
426             if (idx >= 0) {
427                 prefix = attName.substring(0, idx);
428                 attName = attName.substring(idx+1);
429             }
430             if (prefix.equals(XMLNS)) continue;
431             
432             AttributeDecl attr = new AttributeDecl(_schema, attName);
433             
434             //-- guess simple type
435             String typeName = _nsPrefix + ':' + 
436                 DatatypeHandler.guessType(atts.getValue(i));
437                 
438             attr.setSimpleTypeReference(typeName);
439             
440             sInfo.attributes.addElement(attr);
441         }
442         
443     } //-- startElement
444     
445 
446     //------------------------------------/
447     //- org.xml.sax.ErrorHandler methods -/
448     //------------------------------------/
449     
450     public void error(SAXParseException exception)
451         throws org.xml.sax.SAXException
452     {
453         throw exception;
454         
455     } //-- error
456     
457     public void fatalError(SAXParseException exception)
458         throws org.xml.sax.SAXException
459     {
460         throw exception;
461         
462     } //-- fatalError
463     
464     
465     public void warning(SAXParseException exception)
466         throws org.xml.sax.SAXException
467     {
468         throw exception;
469         
470     } //-- warning
471     
472     //-------------------------/
473     //- local private methods -/
474     //-------------------------/
475     
476     /**
477      * Merges the two element declarations. The resulting
478      * merge is placed in ElementDecl e1.
479      *
480      * @param e1 the main ElementDecl 
481      * @param e2 the secondary ElementDecl to merge with e1
482     **/
483     private void merge(ElementDecl e1, ElementDecl e2) 
484         throws SchemaException
485     {
486         
487         XMLType e1Type = e1.getType();
488         XMLType e2Type = e2.getType();
489          
490         //-- Make sure types are not null and if so create them
491         if (e1Type == null) {
492             if (e2Type == null) return; //-- nothing to merge
493 			if (e2Type.isSimpleType()) {
494 			    e1.setType(e2Type);
495 			}
496 			else {
497 			    ComplexType cType = new ComplexType(_schema);
498 			    Group group = new Group();
499 			    group.setOrder(_defaultGroupOrder);
500 			    cType.addGroup(group);
501 			    e1.setType(cType);
502 			    e1Type = cType;
503 			}
504         }
505         else if (e2Type == null) {
506             if (e1Type.isSimpleType()) {
507                 e2.setType(e1Type);
508             }
509             else {
510                 ComplexType cType = new ComplexType(_schema);
511                 Group group = new Group();
512                 group.setOrder(_defaultGroupOrder);
513                 cType.addGroup(group);
514                 e2.setType(cType);
515                 e2Type = cType;
516             }
517         }
518         
519         //-- both simple types
520         if (e1Type.isSimpleType() && e2Type.isSimpleType()) {
521             if (!e1Type.getName().equals(e2Type.getName())) {
522                 String typeName = _nsPrefix + ':' +
523                     DatatypeHandler.whichType(e1Type.getName(),
524                         e2Type.getName());
525                 e1.setType(null);
526                 e1.setTypeReference(typeName);
527             }
528             return;
529         }
530         //-- e1 is simple, e2 is complex
531         else if (e1Type.isSimpleType()) {
532             ComplexType cType = new ComplexType(_schema);
533             e1.setType(cType);
534             Group group = new Group();
535             group.setOrder(_defaultGroupOrder);
536             cType.addGroup(group);
537             cType.setContentType(ContentType.mixed);
538             e1Type = cType;
539             //-- do not return here...we need to now treat as both
540             //-- were complex
541         }
542         //-- e2 is simple, e1 is complex
543         else if (e2Type.isSimpleType()) {
544             ComplexType cType = new ComplexType(_schema);
545             e2.setType(cType);
546             Group group = new Group();
547             group.setOrder(_defaultGroupOrder);
548             cType.addGroup(group);
549             cType.setContentType(ContentType.mixed);
550             e2Type = cType;
551             //-- do not return here...we need to now treat as both
552             //-- were complex
553         }
554         
555         //-- both complex types
556         ComplexType cType1 = (ComplexType)e1Type;
557         ComplexType cType2 = (ComplexType)e2Type;
558         
559         //-- loop through all element/attribute declarations
560         //-- of e2 and add them to e1 if they do not already exist
561         //-- and mark them as optional
562         
563         Group e1Group = (Group) cType1.getParticle(0);
564         if (e1Group == null) {
565             e1Group = new Group();
566             e1Group.setOrder(_defaultGroupOrder);
567             cType1.addGroup(e1Group);
568             
569         }
570         Group e2Group = (Group) cType2.getParticle(0);
571         if (e2Group == null) {
572             e2Group = new Group();
573             e2Group.setOrder(_defaultGroupOrder);
574             cType2.addGroup(e2Group);
575             
576         }
577         
578         Enumeration enumeration = e2Group.enumerate();
579         while (enumeration.hasMoreElements()) {
580             Particle particle = (Particle)enumeration.nextElement();
581             if (particle.getStructureType() == Structure.ELEMENT) {
582                 ElementDecl element = (ElementDecl)particle;
583                 ElementDecl main = e1Group.getElementDecl(element.getName());
584                 if (main == null) {
585                     e1Group.addElementDecl(element);
586                     element.setMinOccurs(0);
587                 }
588                 else {
589                     merge(main, element);
590                 }
591             }
592         }
593         //-- add all attributes from type2
594         enumeration = cType2.getAttributeDecls();
595         
596         while (enumeration.hasMoreElements()) {
597             //-- check for attribute with same name
598             AttributeDecl attNew =  (AttributeDecl)enumeration.nextElement();
599                     
600             String attName = attNew.getName();
601             AttributeDecl attPrev = cType1.getAttributeDecl(attName);
602             if (attPrev == null) {
603                 attNew.setUse(AttributeDecl.USE_OPTIONAL);
604                 cType1.addAttributeDecl(attNew);
605             }
606             else {
607                 String type1 = attPrev.getSimpleType().getName();
608                 String type2 = attNew.getSimpleType().getName();
609                 if (!type1.equals(type2)) {
610                     String typeName = _nsPrefix + ':' + 
611                         DatatypeHandler.whichType(type1, type2);
612                     attPrev.setSimpleTypeReference(typeName);                        }
613             }
614         }
615         
616         //-- loop through all element/attribute declarations
617         //-- of e1 and if they do not exist in e2, simply
618         //-- mark them as optional
619         enumeration = e1Group.enumerate();
620         while (enumeration.hasMoreElements()) {
621             Particle particle = (Particle)enumeration.nextElement();
622             if (particle.getStructureType() == Structure.ELEMENT) {
623                 ElementDecl element = (ElementDecl)particle;
624                 if (e2Group.getElementDecl(element.getName()) == null) {
625                     element.setMinOccurs(0);
626                 }
627             }
628         }
629         
630         
631     } //-- merge
632     
633     /**
634      * Inner-class to hold state
635     **/
636     class StateInfo {
637         Namespaces   namespaces   = null;
638         ElementDecl  element      = null;
639         Vector       attributes   = null;
640         StringBuffer buffer       = null;
641         boolean      mixed        = false;
642         boolean      complex      = false;
643         boolean      topLevel     = false;
644         
645         public StateInfo() {
646             super();
647             attributes = new Vector();
648         }
649         
650     } //-- StateInfo
651     
652 } //--
653 
654